In [1]:
import numpy as np
import pandas as pd
import glob
import json

## Data Pre-processing

In [125]:
# read in csv file generated by Amazon API
path = r'C:\Users\movin\Documents\GitHub\PersonalityFromPhoto\data'
allFiles = glob.glob(path + '/*.csv')
df = pd.DataFrame()
list_ = []
for file_ in allFiles:
    df = pd.read_csv(file_, index_col=None, header=0)
    list_.append(df)
df = pd.concat(list_, ignore_index=True)

In [126]:
len(df.index)

15997

In [127]:
df.head()

Unnamed: 0.1,Unnamed: 0,fileName,Labels,FaceDetails
0,0,Facebook/ENFJ/1000006876806379.jpg,"[{'Name': 'Human', 'Confidence': 99.2957839965...",[{'BoundingBox': {'Width': 0.11999999731779099...
1,1,Facebook/ENFJ/1001091583356351.jpg,"[{'Name': 'Art', 'Confidence': 52.856147766113...",[]
2,2,Facebook/ENFJ/1001328126675771.jpg,"[{'Name': 'People', 'Confidence': 99.012985229...","[{'BoundingBox': {'Width': 0.4146634638309479,..."
3,3,Facebook/ENFJ/10100163853254202.jpg,"[{'Name': 'Art', 'Confidence': 91.370552062988...",[]
4,4,Facebook/ENFJ/10100238407492499.jpg,"[{'Name': 'Human', 'Confidence': 99.3081130981...","[{'BoundingBox': {'Width': 0.6088888645172119,..."


In [128]:
# rename the unnamed column
df = df.rename(columns={'Unnamed: 0': 'GroupIndex'})

In [129]:
# add an index column
df['index'] = np.arange(len(df.index))

In [130]:
cols = df.columns.tolist()
cols

['GroupIndex', 'fileName', 'Labels', 'FaceDetails', 'index']

In [131]:
cols = cols[-1:] + cols[:-1]
cols

['index', 'GroupIndex', 'fileName', 'Labels', 'FaceDetails']

In [132]:
df = df[cols]

In [133]:
df.head()

Unnamed: 0,index,GroupIndex,fileName,Labels,FaceDetails
0,0,0,Facebook/ENFJ/1000006876806379.jpg,"[{'Name': 'Human', 'Confidence': 99.2957839965...",[{'BoundingBox': {'Width': 0.11999999731779099...
1,1,1,Facebook/ENFJ/1001091583356351.jpg,"[{'Name': 'Art', 'Confidence': 52.856147766113...",[]
2,2,2,Facebook/ENFJ/1001328126675771.jpg,"[{'Name': 'People', 'Confidence': 99.012985229...","[{'BoundingBox': {'Width': 0.4146634638309479,..."
3,3,3,Facebook/ENFJ/10100163853254202.jpg,"[{'Name': 'Art', 'Confidence': 91.370552062988...",[]
4,4,4,Facebook/ENFJ/10100238407492499.jpg,"[{'Name': 'Human', 'Confidence': 99.3081130981...","[{'BoundingBox': {'Width': 0.6088888645172119,..."


In [134]:
# get the personality for every photo
# get photo name as fileName

personality_list = []
photoName_list = []

for i in range(len(df.index)):
    personality_list.append(df['fileName'][i][9:13])
    photoName_list.append(df['fileName'][i][14:])

In [135]:
len(personality_list)

15997

In [136]:
len(photoName_list)

15997

In [137]:
# add the personality column to dataframe
se = pd.Series(personality_list)
df['Personality'] = se.values
df.head()

Unnamed: 0,index,GroupIndex,fileName,Labels,FaceDetails,Personality
0,0,0,Facebook/ENFJ/1000006876806379.jpg,"[{'Name': 'Human', 'Confidence': 99.2957839965...",[{'BoundingBox': {'Width': 0.11999999731779099...,ENFJ
1,1,1,Facebook/ENFJ/1001091583356351.jpg,"[{'Name': 'Art', 'Confidence': 52.856147766113...",[],ENFJ
2,2,2,Facebook/ENFJ/1001328126675771.jpg,"[{'Name': 'People', 'Confidence': 99.012985229...","[{'BoundingBox': {'Width': 0.4146634638309479,...",ENFJ
3,3,3,Facebook/ENFJ/10100163853254202.jpg,"[{'Name': 'Art', 'Confidence': 91.370552062988...",[],ENFJ
4,4,4,Facebook/ENFJ/10100238407492499.jpg,"[{'Name': 'Human', 'Confidence': 99.3081130981...","[{'BoundingBox': {'Width': 0.6088888645172119,...",ENFJ


In [138]:
# drop the fileName column

df = df.drop(['fileName'], axis=1)
df.head()

Unnamed: 0,index,GroupIndex,Labels,FaceDetails,Personality
0,0,0,"[{'Name': 'Human', 'Confidence': 99.2957839965...",[{'BoundingBox': {'Width': 0.11999999731779099...,ENFJ
1,1,1,"[{'Name': 'Art', 'Confidence': 52.856147766113...",[],ENFJ
2,2,2,"[{'Name': 'People', 'Confidence': 99.012985229...","[{'BoundingBox': {'Width': 0.4146634638309479,...",ENFJ
3,3,3,"[{'Name': 'Art', 'Confidence': 91.370552062988...",[],ENFJ
4,4,4,"[{'Name': 'Human', 'Confidence': 99.3081130981...","[{'BoundingBox': {'Width': 0.6088888645172119,...",ENFJ


In [139]:
# add PhotoName column

se = pd.Series(photoName_list)
df['PhotoName'] = se.values
df.head()

Unnamed: 0,index,GroupIndex,Labels,FaceDetails,Personality,PhotoName
0,0,0,"[{'Name': 'Human', 'Confidence': 99.2957839965...",[{'BoundingBox': {'Width': 0.11999999731779099...,ENFJ,1000006876806379.jpg
1,1,1,"[{'Name': 'Art', 'Confidence': 52.856147766113...",[],ENFJ,1001091583356351.jpg
2,2,2,"[{'Name': 'People', 'Confidence': 99.012985229...","[{'BoundingBox': {'Width': 0.4146634638309479,...",ENFJ,1001328126675771.jpg
3,3,3,"[{'Name': 'Art', 'Confidence': 91.370552062988...",[],ENFJ,10100163853254202.jpg
4,4,4,"[{'Name': 'Human', 'Confidence': 99.3081130981...","[{'BoundingBox': {'Width': 0.6088888645172119,...",ENFJ,10100238407492499.jpg


In [140]:
cols = df.columns.tolist()
cols

['index', 'GroupIndex', 'Labels', 'FaceDetails', 'Personality', 'PhotoName']

In [141]:
cols = cols[0:2] + cols[-1:] + cols[2:5]
cols

['index', 'GroupIndex', 'PhotoName', 'Labels', 'FaceDetails', 'Personality']

In [142]:
df = df[cols]
df.head()

Unnamed: 0,index,GroupIndex,PhotoName,Labels,FaceDetails,Personality
0,0,0,1000006876806379.jpg,"[{'Name': 'Human', 'Confidence': 99.2957839965...",[{'BoundingBox': {'Width': 0.11999999731779099...,ENFJ
1,1,1,1001091583356351.jpg,"[{'Name': 'Art', 'Confidence': 52.856147766113...",[],ENFJ
2,2,2,1001328126675771.jpg,"[{'Name': 'People', 'Confidence': 99.012985229...","[{'BoundingBox': {'Width': 0.4146634638309479,...",ENFJ
3,3,3,10100163853254202.jpg,"[{'Name': 'Art', 'Confidence': 91.370552062988...",[],ENFJ
4,4,4,10100238407492499.jpg,"[{'Name': 'Human', 'Confidence': 99.3081130981...","[{'BoundingBox': {'Width': 0.6088888645172119,...",ENFJ


In [143]:
# empty face details number
count = 0
for i in df['FaceDetails']:
    if len(i)==2:
        count += 1

In [144]:
# 3409 photo don't have face details returned
count

3409

In [145]:
# replace empty face detail with NaN
df = df.replace('[]', np.nan)

In [146]:
df.head()

Unnamed: 0,index,GroupIndex,PhotoName,Labels,FaceDetails,Personality
0,0,0,1000006876806379.jpg,"[{'Name': 'Human', 'Confidence': 99.2957839965...",[{'BoundingBox': {'Width': 0.11999999731779099...,ENFJ
1,1,1,1001091583356351.jpg,"[{'Name': 'Art', 'Confidence': 52.856147766113...",,ENFJ
2,2,2,1001328126675771.jpg,"[{'Name': 'People', 'Confidence': 99.012985229...","[{'BoundingBox': {'Width': 0.4146634638309479,...",ENFJ
3,3,3,10100163853254202.jpg,"[{'Name': 'Art', 'Confidence': 91.370552062988...",,ENFJ
4,4,4,10100238407492499.jpg,"[{'Name': 'Human', 'Confidence': 99.3081130981...","[{'BoundingBox': {'Width': 0.6088888645172119,...",ENFJ


In [147]:
# drop empty value
df = df.dropna()

In [148]:
df.head()

Unnamed: 0,index,GroupIndex,PhotoName,Labels,FaceDetails,Personality
0,0,0,1000006876806379.jpg,"[{'Name': 'Human', 'Confidence': 99.2957839965...",[{'BoundingBox': {'Width': 0.11999999731779099...,ENFJ
2,2,2,1001328126675771.jpg,"[{'Name': 'People', 'Confidence': 99.012985229...","[{'BoundingBox': {'Width': 0.4146634638309479,...",ENFJ
4,4,4,10100238407492499.jpg,"[{'Name': 'Human', 'Confidence': 99.3081130981...","[{'BoundingBox': {'Width': 0.6088888645172119,...",ENFJ
5,5,5,10100251167240940.jpg,"[{'Name': 'People', 'Confidence': 99.175323486...",[{'BoundingBox': {'Width': 0.12111110985279083...,ENFJ
6,6,6,10100259363675182.jpg,"[{'Name': 'Human', 'Confidence': 99.2900848388...",[{'BoundingBox': {'Width': 0.42888888716697693...,ENFJ


In [149]:
len(df)

12580

In [150]:
# separate the personality type to IvsE, SvsN, TvsF, JvsP
IvsE_list = []
SvsN_list = []
TvsF_list = []
JvsP_list = []

for i in df['Personality']:
    if i == 'ENFJ':
        IvsE_list.append('E')
        SvsN_list.append('N')
        TvsF_list.append('Feel')
        JvsP_list.append('J')
    elif i == 'ENFP':
        IvsE_list.append('E')
        SvsN_list.append('N')
        TvsF_list.append('Feel')
        JvsP_list.append('P')
    elif i == 'ENTJ':
        IvsE_list.append('E')
        SvsN_list.append('N')
        TvsF_list.append('Think')
        JvsP_list.append('J')
    elif i == 'ENTP':
        IvsE_list.append('E')
        SvsN_list.append('N')
        TvsF_list.append('Think')
        JvsP_list.append('P')
    elif i == 'ESFJ':
        IvsE_list.append('E')
        SvsN_list.append('S')
        TvsF_list.append('Feel')
        JvsP_list.append('J')
    elif i == 'ESFP':
        IvsE_list.append('E')
        SvsN_list.append('S')
        TvsF_list.append('Feel')
        JvsP_list.append('P')
    elif i == 'ESTJ':
        IvsE_list.append('E')
        SvsN_list.append('S')
        TvsF_list.append('Think')
        JvsP_list.append('J')
    elif i == 'ESTP':
        IvsE_list.append('E')
        SvsN_list.append('S')
        TvsF_list.append('Think')
        JvsP_list.append('P')
    elif i == 'INFJ':
        IvsE_list.append('I')
        SvsN_list.append('N')
        TvsF_list.append('Feel')
        JvsP_list.append('J')
    elif i == 'INFP':
        IvsE_list.append('I')
        SvsN_list.append('N')
        TvsF_list.append('Feel')
        JvsP_list.append('P')
    elif i == 'INTJ':
        IvsE_list.append('I')
        SvsN_list.append('N')
        TvsF_list.append('Think')
        JvsP_list.append('J')
    elif i == 'INTP':
        IvsE_list.append('I')
        SvsN_list.append('N')
        TvsF_list.append('Think')
        JvsP_list.append('P')
    elif i == 'ISFJ':
        IvsE_list.append('I')
        SvsN_list.append('S')
        TvsF_list.append('Feel')
        JvsP_list.append('J')
    elif i == 'ISFP':
        IvsE_list.append('I')
        SvsN_list.append('S')
        TvsF_list.append('Feel')
        JvsP_list.append('P')
    elif i == 'ISTJ':
        IvsE_list.append('I')
        SvsN_list.append('S')
        TvsF_list.append('Think')
        JvsP_list.append('J')
    elif i == 'ISTP':
        IvsE_list.append('I')
        SvsN_list.append('S')
        TvsF_list.append('Think')
        JvsP_list.append('P')
    else:
        print('None of the 16 types.')

In [151]:
df['IvsE'] = IvsE_list
df['SvsN'] = SvsN_list
df['TvsF'] = TvsF_list
df['JvsP'] = JvsP_list

df.head()

Unnamed: 0,index,GroupIndex,PhotoName,Labels,FaceDetails,Personality,IvsE,SvsN,TvsF,JvsP
0,0,0,1000006876806379.jpg,"[{'Name': 'Human', 'Confidence': 99.2957839965...",[{'BoundingBox': {'Width': 0.11999999731779099...,ENFJ,E,N,Feel,J
2,2,2,1001328126675771.jpg,"[{'Name': 'People', 'Confidence': 99.012985229...","[{'BoundingBox': {'Width': 0.4146634638309479,...",ENFJ,E,N,Feel,J
4,4,4,10100238407492499.jpg,"[{'Name': 'Human', 'Confidence': 99.3081130981...","[{'BoundingBox': {'Width': 0.6088888645172119,...",ENFJ,E,N,Feel,J
5,5,5,10100251167240940.jpg,"[{'Name': 'People', 'Confidence': 99.175323486...",[{'BoundingBox': {'Width': 0.12111110985279083...,ENFJ,E,N,Feel,J
6,6,6,10100259363675182.jpg,"[{'Name': 'Human', 'Confidence': 99.2900848388...",[{'BoundingBox': {'Width': 0.42888888716697693...,ENFJ,E,N,Feel,J


## Face Detail Value

In [152]:
df['FaceDetails'][2]

"[{'BoundingBox': {'Width': 0.4146634638309479, 'Height': 0.4146634638309479, 'Left': 0.05288461595773697, 'Top': 0.25961539149284363}, 'AgeRange': {'Low': 19, 'High': 36}, 'Smile': {'Value': True, 'Confidence': 62.861515045166016}, 'Eyeglasses': {'Value': False, 'Confidence': 99.99372100830078}, 'Sunglasses': {'Value': False, 'Confidence': 99.97441101074219}, 'Gender': {'Value': 'Female', 'Confidence': 100.0}, 'Beard': {'Value': True, 'Confidence': 52.13731384277344}, 'Mustache': {'Value': False, 'Confidence': 81.03985595703125}, 'EyesOpen': {'Value': True, 'Confidence': 99.98633575439453}, 'MouthOpen': {'Value': False, 'Confidence': 99.98002624511719}, 'Emotions': [{'Type': 'HAPPY', 'Confidence': 86.62401580810547}, {'Type': 'CONFUSED', 'Confidence': 1.4656126499176025}, {'Type': 'CALM', 'Confidence': 1.3178050518035889}], 'Landmarks': [{'Type': 'eyeLeft', 'X': 0.1717977076768875, 'Y': 0.43629008531570435}, {'Type': 'eyeRight', 'X': 0.3249512314796448, 'Y': 0.4205191731452942}, {'Typ

In [153]:
teststring = df['FaceDetails'][2]

In [154]:
import ast

In [155]:
dictstring = teststring[1:-1]
testdict = ast.literal_eval(dictstring)

In [156]:
testdict

{'AgeRange': {'High': 36, 'Low': 19},
 'Beard': {'Confidence': 52.13731384277344, 'Value': True},
 'BoundingBox': {'Height': 0.4146634638309479,
  'Left': 0.05288461595773697,
  'Top': 0.25961539149284363,
  'Width': 0.4146634638309479},
 'Confidence': 99.99908447265625,
 'Emotions': [{'Confidence': 86.62401580810547, 'Type': 'HAPPY'},
  {'Confidence': 1.4656126499176025, 'Type': 'CONFUSED'},
  {'Confidence': 1.3178050518035889, 'Type': 'CALM'}],
 'Eyeglasses': {'Confidence': 99.99372100830078, 'Value': False},
 'EyesOpen': {'Confidence': 99.98633575439453, 'Value': True},
 'Gender': {'Confidence': 100.0, 'Value': 'Female'},
 'Landmarks': [{'Type': 'eyeLeft',
   'X': 0.1717977076768875,
   'Y': 0.43629008531570435},
  {'Type': 'eyeRight', 'X': 0.3249512314796448, 'Y': 0.4205191731452942},
  {'Type': 'nose', 'X': 0.24751560389995575, 'Y': 0.5103107690811157},
  {'Type': 'mouthLeft', 'X': 0.21099551022052765, 'Y': 0.574430525302887},
  {'Type': 'mouthRight', 'X': 0.3355630934238434, 'Y':

In [157]:
testdict['Quality']['Brightness']

38.64361572265625

In [158]:
testdict.keys()

dict_keys(['BoundingBox', 'AgeRange', 'Smile', 'Eyeglasses', 'Sunglasses', 'Gender', 'Beard', 'Mustache', 'EyesOpen', 'MouthOpen', 'Emotions', 'Landmarks', 'Pose', 'Quality', 'Confidence'])

In [159]:
len(testdict)

15

In [160]:
testdict['AgeRange']

{'High': 36, 'Low': 19}

In [161]:
testdict['Smile']

{'Confidence': 62.861515045166016, 'Value': True}

In [162]:
testdict['Eyeglasses']

{'Confidence': 99.99372100830078, 'Value': False}

In [163]:
testdict['Sunglasses']

{'Confidence': 99.97441101074219, 'Value': False}

In [164]:
testdict['Gender']

{'Confidence': 100.0, 'Value': 'Female'}

In [165]:
testdict['Beard']

{'Confidence': 52.13731384277344, 'Value': True}

In [166]:
testdict['Mustache']

{'Confidence': 81.03985595703125, 'Value': False}

In [167]:
testdict['EyesOpen']

{'Confidence': 99.98633575439453, 'Value': True}

In [168]:
testdict['MouthOpen']

{'Confidence': 99.98002624511719, 'Value': False}

In [169]:
testdict['Emotions']

[{'Confidence': 86.62401580810547, 'Type': 'HAPPY'},
 {'Confidence': 1.4656126499176025, 'Type': 'CONFUSED'},
 {'Confidence': 1.3178050518035889, 'Type': 'CALM'}]

In [170]:
testdict['Pose']

{'Pitch': -8.956658363342285,
 'Roll': -5.676806926727295,
 'Yaw': -9.280488014221191}

In [171]:
testdict['Quality']

{'Brightness': 38.64361572265625, 'Sharpness': 99.9980239868164}

In [172]:
testdict['Confidence']

99.99908447265625

In [173]:
QualityDF = pd.DataFrame(data=testdict['Quality'], index=[0])
QualityDF

Unnamed: 0,Brightness,Sharpness
0,38.643616,99.998024


### some photos have multiple people

In [174]:
# get all emotion types
for face_str in df['FaceDetails']:
    # delete [] of string
    face_str = face_str[1:-1]
    
    # convert to dict
    face_dict = ast.literal_eval(face_str)
    
    # see if every observation has 3 emotion types
    count = 0
    if len(face_dict['Emotions']) != 3:
        count += 1
    
    # get emotions
    EmotionSet = set()
    for i in face_dict['Emotions']:
        EmotionSet.add(i['Type'])

TypeError: tuple indices must be integers or slices, not str

In [175]:
# only three tyeps of emotions
EmotionSet

{'CALM', 'CONFUSED', 'HAPPY'}

In [176]:
# every observation has those three types of emotions
count

0

In [177]:
# get all pose types
for face_str in df['FaceDetails']:
    # delete [] of string
    face_str = face_str[1:-1]
    
    # convert to dict
    face_dict = ast.literal_eval(face_str)
    
    # see if every observation has 3 emotion types
    count = 0
    if len(face_dict['Pose']) != 3:
        count += 1
    
    # get emotions
    PoseSet = set()
    for i in list(face_dict['Pose'].keys()):
        PoseSet.add(i)

TypeError: tuple indices must be integers or slices, not str

In [178]:
# only those three types of poses
PoseSet

{'Pitch', 'Roll', 'Yaw'}

In [179]:
# every observation has three poses
count

0

In [180]:
# only keep photos of one person

drop_index = []
for i in range(len(df)):
    # check string length, <3000 is one person
    if len(df['FaceDetails'].iloc[i]) > 3000:
        drop_index.append(df.index[i])

In [181]:
df = df.drop(drop_index)

In [182]:
len(df)

9525

In [183]:
df.head()

Unnamed: 0,index,GroupIndex,PhotoName,Labels,FaceDetails,Personality,IvsE,SvsN,TvsF,JvsP
2,2,2,1001328126675771.jpg,"[{'Name': 'People', 'Confidence': 99.012985229...","[{'BoundingBox': {'Width': 0.4146634638309479,...",ENFJ,E,N,Feel,J
4,4,4,10100238407492499.jpg,"[{'Name': 'Human', 'Confidence': 99.3081130981...","[{'BoundingBox': {'Width': 0.6088888645172119,...",ENFJ,E,N,Feel,J
7,7,7,10100279131750930.jpg,"[{'Name': 'People', 'Confidence': 98.705482482...","[{'BoundingBox': {'Width': 0.6666666865348816,...",ENFJ,E,N,Feel,J
9,9,9,10100367881175131.jpg,"[{'Name': 'Human', 'Confidence': 99.3036804199...",[{'BoundingBox': {'Width': 0.39759817719459534...,ENFJ,E,N,Feel,J
10,10,10,10100371498945180.jpg,"[{'Name': 'Human', 'Confidence': 99.3256072998...","[{'BoundingBox': {'Width': 0.4933333396911621,...",ENFJ,E,N,Feel,J


In [184]:
testdict['Emotions']

[{'Confidence': 86.62401580810547, 'Type': 'HAPPY'},
 {'Confidence': 1.4656126499176025, 'Type': 'CONFUSED'},
 {'Confidence': 1.3178050518035889, 'Type': 'CALM'}]

In [185]:
EmotionsList = []
for i in testdict['Emotions']:
    EmotionsList.append(i['Type'])
EmotionsList

['HAPPY', 'CONFUSED', 'CALM']

In [186]:
'HAPPY' in EmotionsList

True

In [187]:
EmotionIndex = EmotionsList.index('HAPPY')
EmotionIndex

0

In [188]:
testdict['Emotions'][EmotionIndex]

{'Confidence': 86.62401580810547, 'Type': 'HAPPY'}

In [189]:
# get features

AgeList = []

BeardConfList = []
BeardValueList = []

HappyList = []
ConfusedList = []
CalmList = []
AngryList = []
SadList = []
SurprisedList = []
DisgustedList = []

EyeglassesConfList = []
EyeglassesValueList = []

EyesOpenConfList = []
EyesOpenValueList = []

GenderConfList = []
GenderValueList = []

MouthOpenConfList = []
MouthOpenValueList = []

MustacheConfList = []
MustacheValueList = []

PitchList = []
RollList = []
YawList = []

SmileConfList = []
SmileValueList = []

BrightnessList = []
SharpnessList = []

SunglassesConfList = []
SunglassesValueList = []

for face_str in df['FaceDetails']:
    # delete [] of string
    face_str = face_str[1:-1]
    
    # convert to dict
    face_dict = ast.literal_eval(face_str)
    
    AgeList.append((face_dict['AgeRange']['High'] + face_dict['AgeRange']['Low']) / 2)
    
    BeardConfList.append(face_dict['Beard']['Confidence'])
    BeardValueList.append(face_dict['Beard']['Value'])
    
    EmotionsList = []
    for i in face_dict['Emotions']:
        EmotionsList.append(i['Type'])
        
    if 'HAPPY' in EmotionsList:
        EmotionIndex = EmotionsList.index('HAPPY')
        HappyList.append(face_dict['Emotions'][EmotionIndex]['Confidence'])
    else:
        HappyList.append(0.0)
    
    if 'CONFUSED' in EmotionsList:
        EmotionIndex = EmotionsList.index('CONFUSED')
        ConfusedList.append(face_dict['Emotions'][EmotionIndex]['Confidence'])
    else:
        ConfusedList.append(0.0)
        
    if 'CALM' in EmotionsList:
        EmotionIndex = EmotionsList.index('CALM')
        CalmList.append(face_dict['Emotions'][EmotionIndex]['Confidence'])
    else:
        CalmList.append(0.0)
        
    if 'ANGRY' in EmotionsList:
        EmotionIndex = EmotionsList.index('ANGRY')
        AngryList.append(face_dict['Emotions'][EmotionIndex]['Confidence'])
    else:
        AngryList.append(0.0)
        
    if 'SAD' in EmotionsList:
        EmotionIndex = EmotionsList.index('SAD')
        SadList.append(face_dict['Emotions'][EmotionIndex]['Confidence'])
    else:
        SadList.append(0.0)
        
    if 'SURPRISED' in EmotionsList:
        EmotionIndex = EmotionsList.index('SURPRISED')
        SurprisedList.append(face_dict['Emotions'][EmotionIndex]['Confidence'])
    else:
        SurprisedList.append(0.0)
        
    if 'DISGUSTED' in EmotionsList:
        EmotionIndex = EmotionsList.index('DISGUSTED')
        DisgustedList.append(face_dict['Emotions'][EmotionIndex]['Confidence'])
    else:
        DisgustedList.append(0.0)
    
            
    EyeglassesConfList.append(face_dict['Eyeglasses']['Confidence'])
    EyeglassesValueList.append(face_dict['Eyeglasses']['Value'])
    
    EyesOpenConfList.append(face_dict['EyesOpen']['Confidence'])
    EyesOpenValueList.append(face_dict['EyesOpen']['Value'])
    
    GenderConfList.append(face_dict['Gender']['Confidence'])
    GenderValueList.append(face_dict['Gender']['Value'])
    
    MouthOpenConfList.append(face_dict['MouthOpen']['Confidence'])
    MouthOpenValueList.append(face_dict['MouthOpen']['Value'])
    
    MustacheConfList.append(face_dict['Mustache']['Confidence'])
    MustacheValueList.append(face_dict['Mustache']['Value'])
    
    PitchList.append(face_dict['Pose']['Pitch'])
    RollList.append(face_dict['Pose']['Roll'])
    YawList.append(face_dict['Pose']['Yaw'])
    
    SmileConfList.append(face_dict['Smile']['Confidence'])
    SmileValueList.append(face_dict['Smile']['Value'])
    
    BrightnessList.append(face_dict['Quality']['Brightness'])
    SharpnessList.append(face_dict['Quality']['Sharpness'])
    
    SunglassesConfList.append(face_dict['Sunglasses']['Confidence'])
    SunglassesValueList.append(face_dict['Sunglasses']['Value'])
    

In [190]:
len(df)

9525

In [191]:
print(len(AgeList), len(BeardConfList), len(BeardValueList), len(EyeglassesConfList),
      len(EyeglassesValueList), len(EyesOpenConfList), len(EyesOpenValueList), len(GenderConfList),
      len(GenderValueList), len(MouthOpenConfList), len(MouthOpenValueList), len(MustacheConfList),
      len(MustacheValueList), len(PitchList), len(RollList), len(YawList), len(SmileConfList), len(SmileValueList),
      len(BrightnessList), len(SharpnessList), len(SunglassesConfList), len(SunglassesValueList))

9525 9525 9525 9525 9525 9525 9525 9525 9525 9525 9525 9525 9525 9525 9525 9525 9525 9525 9525 9525 9525 9525


In [192]:
print(len(HappyList), len(ConfusedList), len(CalmList), len(SadList), len(AngryList), len(SurprisedList), len(DisgustedList))

9525 9525 9525 9525 9525 9525 9525


In [193]:
# add feature columns to dataframe

df['Age'] = AgeList

df['BeardConf'] = BeardConfList
df['BeardValue'] = BeardValueList

df['Happy'] = HappyList
df['Confused'] = ConfusedList
df['Calm'] = CalmList
df['Angry'] = AngryList
df['Sad'] = SadList
df['Surprised'] = SurprisedList
df['Disgusted'] = DisgustedList

df['EyeglassesConf'] = EyeglassesConfList
df['EyeglassesValue'] = EyeglassesValueList

df['EyesOpenConf'] = EyesOpenConfList
df['EyesOpenValue'] = EyesOpenValueList

df['GenderConf'] = GenderConfList
df['GenderValue'] = GenderValueList

df['MouthOpenConf'] = MouthOpenConfList
df['MouthOpenValue'] = MouthOpenValueList

df['MustacheConf'] = MustacheConfList
df['MustacheValue'] = MustacheValueList

df['Pitch'] = PitchList
df['Roll'] = RollList
df['Yaw'] = YawList

df['SmileConf'] = SmileConfList
df['SmileValue'] = SmileValueList

df['Brightness'] = BrightnessList
df['Sharpness'] = SharpnessList

df['SunglassesConf'] = SunglassesConfList
df['SunglassesValue'] = SunglassesValueList


In [194]:
df.head()

Unnamed: 0,index,GroupIndex,PhotoName,Labels,FaceDetails,Personality,IvsE,SvsN,TvsF,JvsP,...,MustacheValue,Pitch,Roll,Yaw,SmileConf,SmileValue,Brightness,Sharpness,SunglassesConf,SunglassesValue
2,2,2,1001328126675771.jpg,"[{'Name': 'People', 'Confidence': 99.012985229...","[{'BoundingBox': {'Width': 0.4146634638309479,...",ENFJ,E,N,Feel,J,...,False,-8.956658,-5.676807,-9.280488,62.861515,True,38.643616,99.998024,99.974411,False
4,4,4,10100238407492499.jpg,"[{'Name': 'Human', 'Confidence': 99.3081130981...","[{'BoundingBox': {'Width': 0.6088888645172119,...",ENFJ,E,N,Feel,J,...,False,-14.06557,33.765709,-30.307148,99.733307,True,42.755604,99.984879,98.872681,False
7,7,7,10100279131750930.jpg,"[{'Name': 'People', 'Confidence': 98.705482482...","[{'BoundingBox': {'Width': 0.6666666865348816,...",ENFJ,E,N,Feel,J,...,True,-1.285544,6.345731,12.106484,92.005432,True,42.692741,99.998024,96.547523,True
9,9,9,10100367881175131.jpg,"[{'Name': 'Human', 'Confidence': 99.3036804199...",[{'BoundingBox': {'Width': 0.39759817719459534...,ENFJ,E,N,Feel,J,...,False,-8.18356,-10.917071,8.972569,95.709465,True,44.389008,99.99453,99.947449,False
10,10,10,10100371498945180.jpg,"[{'Name': 'Human', 'Confidence': 99.3256072998...","[{'BoundingBox': {'Width': 0.4933333396911621,...",ENFJ,E,N,Feel,J,...,False,-17.491995,22.59289,-33.872795,94.382462,True,45.048271,99.974861,99.316338,False


In [195]:
df = df.drop(['GroupIndex','Labels','FaceDetails'], axis='columns')
df.head()

Unnamed: 0,index,PhotoName,Personality,IvsE,SvsN,TvsF,JvsP,Age,BeardConf,BeardValue,...,MustacheValue,Pitch,Roll,Yaw,SmileConf,SmileValue,Brightness,Sharpness,SunglassesConf,SunglassesValue
2,2,1001328126675771.jpg,ENFJ,E,N,Feel,J,27.5,52.137314,True,...,False,-8.956658,-5.676807,-9.280488,62.861515,True,38.643616,99.998024,99.974411,False
4,4,10100238407492499.jpg,ENFJ,E,N,Feel,J,34.5,99.997917,False,...,False,-14.06557,33.765709,-30.307148,99.733307,True,42.755604,99.984879,98.872681,False
7,7,10100279131750930.jpg,ENFJ,E,N,Feel,J,29.0,95.081314,True,...,True,-1.285544,6.345731,12.106484,92.005432,True,42.692741,99.998024,96.547523,True
9,9,10100367881175131.jpg,ENFJ,E,N,Feel,J,34.5,99.998543,False,...,False,-8.18356,-10.917071,8.972569,95.709465,True,44.389008,99.99453,99.947449,False
10,10,10100371498945180.jpg,ENFJ,E,N,Feel,J,43.5,84.941391,False,...,False,-17.491995,22.59289,-33.872795,94.382462,True,45.048271,99.974861,99.316338,False


In [196]:
df.to_csv('CleanData.csv')