In [168]:
import numpy as np
import pandas as pd
import glob
import json

## Data Pre-processing

In [169]:
# read in csv file generated by Amazon API
path = r'C:\Users\movin\Documents\GitHub\PersonalityFromPhoto\data'
allFiles = glob.glob(path + '/*.csv')
df = pd.DataFrame()
list_ = []
for file_ in allFiles:
    df = pd.read_csv(file_, index_col=None, header=0)
    list_.append(df)
df = pd.concat(list_, ignore_index=True)

In [170]:
len(df.index)

2000

In [171]:
df.head()

Unnamed: 0.1,Unnamed: 0,fileName,Labels,FaceDetails
0,0,Facebook/ENTJ/1001091583356351.jpg,"[{'Name': 'Art', 'Confidence': 52.856140136718...",[]
1,1,Facebook/ENTJ/10100370325557215.jpg,"[{'Name': 'People', 'Confidence': 98.935142517...",[{'BoundingBox': {'Width': 0.46620047092437744...
2,2,Facebook/ENTJ/10100494871630368.jpg,"[{'Name': 'Animal', 'Confidence': 81.822982788...",[]
3,3,Facebook/ENTJ/10100524843917367.jpg,"[{'Name': 'Human', 'Confidence': 99.3036117553...",[{'BoundingBox': {'Width': 0.43888887763023376...
4,4,Facebook/ENTJ/10100593000838573.jpg,"[{'Name': 'Human', 'Confidence': 99.3088073730...","[{'BoundingBox': {'Width': 0.2988888919353485,..."


In [172]:
# rename the unnamed column
df = df.rename(columns={'Unnamed: 0': 'GroupIndex'})

In [173]:
# add an index column
df['index'] = np.arange(len(df.index))

In [174]:
cols = df.columns.tolist()
cols

['GroupIndex', 'fileName', 'Labels', 'FaceDetails', 'index']

In [175]:
cols = cols[-1:] + cols[:-1]
cols

['index', 'GroupIndex', 'fileName', 'Labels', 'FaceDetails']

In [176]:
df = df[cols]

In [177]:
df.head()

Unnamed: 0,index,GroupIndex,fileName,Labels,FaceDetails
0,0,0,Facebook/ENTJ/1001091583356351.jpg,"[{'Name': 'Art', 'Confidence': 52.856140136718...",[]
1,1,1,Facebook/ENTJ/10100370325557215.jpg,"[{'Name': 'People', 'Confidence': 98.935142517...",[{'BoundingBox': {'Width': 0.46620047092437744...
2,2,2,Facebook/ENTJ/10100494871630368.jpg,"[{'Name': 'Animal', 'Confidence': 81.822982788...",[]
3,3,3,Facebook/ENTJ/10100524843917367.jpg,"[{'Name': 'Human', 'Confidence': 99.3036117553...",[{'BoundingBox': {'Width': 0.43888887763023376...
4,4,4,Facebook/ENTJ/10100593000838573.jpg,"[{'Name': 'Human', 'Confidence': 99.3088073730...","[{'BoundingBox': {'Width': 0.2988888919353485,..."


In [178]:
# get the personality for every photo
personality_list = []
for i in range(len(df.index)):
    personality_list.append(df['fileName'][i][9:13])
len(personality_list)

2000

In [179]:
personality_list[:10]

['ENTJ',
 'ENTJ',
 'ENTJ',
 'ENTJ',
 'ENTJ',
 'ENTJ',
 'ENTJ',
 'ENTJ',
 'ENTJ',
 'ENTJ']

In [180]:
personality_list[-10:]

['ISFP',
 'ISFP',
 'ISFP',
 'ISFP',
 'ISFP',
 'ISFP',
 'ISFP',
 'ISFP',
 'ISFP',
 'ISFP']

In [181]:
# add the personality column to dataframe
se = pd.Series(personality_list)
df['Personality'] = se.values
df.head()

Unnamed: 0,index,GroupIndex,fileName,Labels,FaceDetails,Personality
0,0,0,Facebook/ENTJ/1001091583356351.jpg,"[{'Name': 'Art', 'Confidence': 52.856140136718...",[],ENTJ
1,1,1,Facebook/ENTJ/10100370325557215.jpg,"[{'Name': 'People', 'Confidence': 98.935142517...",[{'BoundingBox': {'Width': 0.46620047092437744...,ENTJ
2,2,2,Facebook/ENTJ/10100494871630368.jpg,"[{'Name': 'Animal', 'Confidence': 81.822982788...",[],ENTJ
3,3,3,Facebook/ENTJ/10100524843917367.jpg,"[{'Name': 'Human', 'Confidence': 99.3036117553...",[{'BoundingBox': {'Width': 0.43888887763023376...,ENTJ
4,4,4,Facebook/ENTJ/10100593000838573.jpg,"[{'Name': 'Human', 'Confidence': 99.3088073730...","[{'BoundingBox': {'Width': 0.2988888919353485,...",ENTJ


In [182]:
df.tail()

Unnamed: 0,index,GroupIndex,fileName,Labels,FaceDetails,Personality
1995,1995,995,Facebook/ISFP/1481631661934876.jpg,"[{'Name': 'Human', 'Confidence': 99.2935333251...","[{'BoundingBox': {'Width': 0.5666666626930237,...",ISFP
1996,1996,996,Facebook/ISFP/1481704585211782.jpg,"[{'Name': 'People', 'Confidence': 99.195518493...","[{'BoundingBox': {'Width': 0.273333340883255, ...",ISFP
1997,1997,997,Facebook/ISFP/1481935611898761.jpg,"[{'Name': 'Human', 'Confidence': 99.3003311157...","[{'BoundingBox': {'Width': 0.7799999713897705,...",ISFP
1998,1998,998,Facebook/ISFP/1483330225037384.jpg,"[{'Name': 'Human', 'Confidence': 99.2822799682...",[],ISFP
1999,1999,999,Facebook/ISFP/1483976051687978.jpg,"[{'Name': 'Human', 'Confidence': 99.3224563598...","[{'BoundingBox': {'Width': 0.5011110901832581,...",ISFP


In [183]:
# empty face details number
count = 0
for i in df['FaceDetails']:
    if len(i)==2:
        count += 1

In [184]:
count

368

In [185]:
# replace empty face detail with NaN
df = df.replace('[]', np.nan)

In [186]:
df.tail()

Unnamed: 0,index,GroupIndex,fileName,Labels,FaceDetails,Personality
1995,1995,995,Facebook/ISFP/1481631661934876.jpg,"[{'Name': 'Human', 'Confidence': 99.2935333251...","[{'BoundingBox': {'Width': 0.5666666626930237,...",ISFP
1996,1996,996,Facebook/ISFP/1481704585211782.jpg,"[{'Name': 'People', 'Confidence': 99.195518493...","[{'BoundingBox': {'Width': 0.273333340883255, ...",ISFP
1997,1997,997,Facebook/ISFP/1481935611898761.jpg,"[{'Name': 'Human', 'Confidence': 99.3003311157...","[{'BoundingBox': {'Width': 0.7799999713897705,...",ISFP
1998,1998,998,Facebook/ISFP/1483330225037384.jpg,"[{'Name': 'Human', 'Confidence': 99.2822799682...",,ISFP
1999,1999,999,Facebook/ISFP/1483976051687978.jpg,"[{'Name': 'Human', 'Confidence': 99.3224563598...","[{'BoundingBox': {'Width': 0.5011110901832581,...",ISFP


In [187]:
# drop empty value
df = df.dropna()

In [188]:
df.tail()

Unnamed: 0,index,GroupIndex,fileName,Labels,FaceDetails,Personality
1994,1994,994,Facebook/ISFP/1481480401887843.jpg,"[{'Name': 'People', 'Confidence': 99.172782897...","[{'BoundingBox': {'Width': 0.5699999928474426,...",ISFP
1995,1995,995,Facebook/ISFP/1481631661934876.jpg,"[{'Name': 'Human', 'Confidence': 99.2935333251...","[{'BoundingBox': {'Width': 0.5666666626930237,...",ISFP
1996,1996,996,Facebook/ISFP/1481704585211782.jpg,"[{'Name': 'People', 'Confidence': 99.195518493...","[{'BoundingBox': {'Width': 0.273333340883255, ...",ISFP
1997,1997,997,Facebook/ISFP/1481935611898761.jpg,"[{'Name': 'Human', 'Confidence': 99.3003311157...","[{'BoundingBox': {'Width': 0.7799999713897705,...",ISFP
1999,1999,999,Facebook/ISFP/1483976051687978.jpg,"[{'Name': 'Human', 'Confidence': 99.3224563598...","[{'BoundingBox': {'Width': 0.5011110901832581,...",ISFP


In [189]:
len(df)

1632

## Face Detail Value

In [190]:
df['FaceDetails'][1999]

"[{'BoundingBox': {'Width': 0.5011110901832581, 'Height': 0.5011110901832581, 'Left': 0.2566666603088379, 'Top': 0.11222222447395325}, 'AgeRange': {'Low': 20, 'High': 38}, 'Smile': {'Value': True, 'Confidence': 95.42520141601562}, 'Eyeglasses': {'Value': False, 'Confidence': 99.97572326660156}, 'Sunglasses': {'Value': False, 'Confidence': 99.11851501464844}, 'Gender': {'Value': 'Female', 'Confidence': 100.0}, 'Beard': {'Value': False, 'Confidence': 99.9770278930664}, 'Mustache': {'Value': False, 'Confidence': 99.98922729492188}, 'EyesOpen': {'Value': True, 'Confidence': 99.99208068847656}, 'MouthOpen': {'Value': True, 'Confidence': 52.927005767822266}, 'Emotions': [{'Type': 'HAPPY', 'Confidence': 95.28817749023438}, {'Type': 'SURPRISED', 'Confidence': 5.70964241027832}, {'Type': 'SAD', 'Confidence': 1.2445271015167236}], 'Landmarks': [{'Type': 'eyeLeft', 'X': 0.41449981927871704, 'Y': 0.30737680196762085}, {'Type': 'eyeRight', 'X': 0.602200448513031, 'Y': 0.29441115260124207}, {'Type':

In [191]:
teststring = df['FaceDetails'][1999]

In [192]:
import ast

In [193]:
dictstring = teststring[1:-1]
testdict = ast.literal_eval(dictstring)

In [194]:
testdict

{'AgeRange': {'High': 38, 'Low': 20},
 'Beard': {'Confidence': 99.9770278930664, 'Value': False},
 'BoundingBox': {'Height': 0.5011110901832581,
  'Left': 0.2566666603088379,
  'Top': 0.11222222447395325,
  'Width': 0.5011110901832581},
 'Confidence': 99.9944839477539,
 'Emotions': [{'Confidence': 95.28817749023438, 'Type': 'HAPPY'},
  {'Confidence': 5.70964241027832, 'Type': 'SURPRISED'},
  {'Confidence': 1.2445271015167236, 'Type': 'SAD'}],
 'Eyeglasses': {'Confidence': 99.97572326660156, 'Value': False},
 'EyesOpen': {'Confidence': 99.99208068847656, 'Value': True},
 'Gender': {'Confidence': 100.0, 'Value': 'Female'},
 'Landmarks': [{'Type': 'eyeLeft',
   'X': 0.41449981927871704,
   'Y': 0.30737680196762085},
  {'Type': 'eyeRight', 'X': 0.602200448513031, 'Y': 0.29441115260124207},
  {'Type': 'nose', 'X': 0.5498752593994141, 'Y': 0.36129671335220337},
  {'Type': 'mouthLeft', 'X': 0.4496658146381378, 'Y': 0.47909122705459595},
  {'Type': 'mouthRight', 'X': 0.6069892048835754, 'Y': 0

In [195]:
testdict['Quality']['Brightness']

65.11871337890625

In [196]:
testdict.keys()

dict_keys(['BoundingBox', 'AgeRange', 'Smile', 'Eyeglasses', 'Sunglasses', 'Gender', 'Beard', 'Mustache', 'EyesOpen', 'MouthOpen', 'Emotions', 'Landmarks', 'Pose', 'Quality', 'Confidence'])

In [197]:
len(testdict)

15

In [198]:
testdict['AgeRange']

{'High': 38, 'Low': 20}

In [199]:
testdict['Smile']

{'Confidence': 95.42520141601562, 'Value': True}

In [200]:
testdict['Eyeglasses']

{'Confidence': 99.97572326660156, 'Value': False}

In [201]:
testdict['Sunglasses']

{'Confidence': 99.11851501464844, 'Value': False}

In [202]:
testdict['Gender']

{'Confidence': 100.0, 'Value': 'Female'}

In [203]:
testdict['Beard']

{'Confidence': 99.9770278930664, 'Value': False}

In [204]:
testdict['Mustache']

{'Confidence': 99.98922729492188, 'Value': False}

In [205]:
testdict['EyesOpen']

{'Confidence': 99.99208068847656, 'Value': True}

In [206]:
testdict['MouthOpen']

{'Confidence': 52.927005767822266, 'Value': True}

In [207]:
testdict['Emotions']

[{'Confidence': 95.28817749023438, 'Type': 'HAPPY'},
 {'Confidence': 5.70964241027832, 'Type': 'SURPRISED'},
 {'Confidence': 1.2445271015167236, 'Type': 'SAD'}]

In [208]:
testdict['Pose']

{'Pitch': 9.185986518859863,
 'Roll': -5.183773517608643,
 'Yaw': 16.507326126098633}

In [209]:
testdict['Quality']

{'Brightness': 65.11871337890625, 'Sharpness': 99.9945297241211}

In [210]:
testdict['Confidence']

99.9944839477539

In [211]:
QualityDF = pd.DataFrame(data=testdict['Quality'], index=[0])
QualityDF

Unnamed: 0,Brightness,Sharpness
0,65.118713,99.99453


### some photos have multiple people

In [123]:
# ignore this for now
# function to get emotion type from dict
def get_emotion(face_str):
    """input face detail string, output list of emotion types"""
    # delete [] of string
    face_str = face_str[1:-1]
    
    # convert to python object
    face_dict = ast.literal_eval(face_str)
    
    # get emotions
    EmotionList = []
    for i in face_dict['Emotions']:
        EmotionList.append(i['Type'])
        
    return EmotionList

In [212]:
# len(face_dict) is the number of people in the photo
# 15 means one person in the photo
# 2 means two people in the photo
for face_str in df['FaceDetails']:
    # delete [] of string
    face_str = face_str[1:-1]
    
    # convert to python object
    face_dict = ast.literal_eval(face_str)
    
    print(len(face_dict))

2
15
15
15
15
15
2
15
15
15
15
2
15
15
15
15
15
2
15
15
15
2
15
5
15
3
15
2
15
15
15
15
2
15
15
15
15
15
15
2
15
2
15
2
3
15
15
3
15
15
2
15
15
15
15
15
9
15
15
15
15
2
15
15
15
15
15
2
15
15
15
15
15
4
2
2
2
15
15
15
2
15
5
15
3
15
2
15
6
2
3
15
15
15
15
15
2
15
2
15
15
15
15
15
15
2
15
2
15
2
15
2
15
2
15
2
2
3
2
15
2
15
15
15
15
15
2
2
15
15
15
15
15
2
15
15
15
2
3
15
15
2
15
2
15
15
15
15
15
15
15
15
15
15
2
3
3
2
2
15
15
2
15
2
2
3
15
15
15
15
15
15
15
15
15
15
15
2
3
5
15
15
15
15
15
15
15
15
2
2
15
15
2
15
15
3
15
5
15
15
15
15
15
15
2
15
15
15
15
15
15
15
15
15
15
15
15
15
15
15
15
15
15
2
15
15
2
15
3
15
15
15
15
2
15
15
15
15
15
15
15
6
15
15
15
15
15
15
2
15
3
15
15
15
15
2
15
15
15
15
15
15
2
2
15
5
15
15
2
2
15
15
15
15
2
15
15
3
15
15
2
15
15
15
15
15
2
2
15
15
15
15
15
15
2
4
15
2
15
2
15
15
15
2
2
15
15
2
15
15
15
15
2
15
3
15
15
15
15
3
2
15
15
15
2
15
15
15
2
15
2
15
15
2
15
2
2
15
15
15
15
15
15
15
15
15
2
15
9
2
15
15
15
15
15
15
15
15
15
15
2
15
2
2
15
5
15
15
2
15

SyntaxError: EOL while scanning string literal (<unknown>, line 1)

In [213]:
for face_str in df['FaceDetails']:
    print(len(face_str))

5918
2912
2933
2950
2934
2952
5883
2954
2942
2925
2935
5900
2956
2923
2945
2934
2942
5893
2949
2936
2939
5889
2948
14706
2940
8857
2941
5872
2934
2932
2968
2944
5887
2923
2954
2932
2943
2947
2926
5869
2938
5862
2968
5891
8818
2925
2945
8838
2944
2946
5876
2943
2913
2947
2950
2929
26553
2956
2949
2954
2928
5887
2937
2962
2936
2935
2946
5880
2938
2939
2955
2939
2927
11734
5883
5878
5889
2929
2944
2956
5879
2939
14735
2947
8850
2945
5901
2938
17627
5890
8833
2922
2957
2940
2951
2944
5847
2926
5866
2953
2941
2935
2935
2941
2926
5898
2941
5890
2973
5880
2916
5890
2936
5851
2935
5878
5889
8871
5910
2935
5880
2941
2928
2929
2942
2941
5886
5880
2937
2933
2955
2938
2954
5878
2934
2939
2937
5862
8865
2951
2944
5888
2935
5854
2935
2930
2954
2928
2950
2946
2935
2942
2931
2934
5882
8824
8815
5872
5885
2946
2968
5873
2926
5888
5879
8846
2919
2957
2958
2955
2940
2944
2942
2947
2938
2928
2938
5908
8820
14705
2957
2945
2908
2947
2926
2947
2936
2936
5860
5897
2936
2929
5892
2956
2945
8839
2927
14750
293

In [149]:
face_str = df['FaceDetails'].iloc[11]
face_str

"[{'BoundingBox': {'Width': 0.22355769574642181, 'Height': 0.22355769574642181, 'Left': 0.40264421701431274, 'Top': 0.3413461446762085}, 'AgeRange': {'Low': 26, 'High': 43}, 'Smile': {'Value': True, 'Confidence': 98.23444366455078}, 'Eyeglasses': {'Value': False, 'Confidence': 99.99933624267578}, 'Sunglasses': {'Value': False, 'Confidence': 99.74183654785156}, 'Gender': {'Value': 'Female', 'Confidence': 100.0}, 'Beard': {'Value': False, 'Confidence': 99.94194793701172}, 'Mustache': {'Value': False, 'Confidence': 99.84404754638672}, 'EyesOpen': {'Value': True, 'Confidence': 99.9548110961914}, 'MouthOpen': {'Value': False, 'Confidence': 99.8336410522461}, 'Emotions': [{'Type': 'HAPPY', 'Confidence': 99.32299041748047}, {'Type': 'CONFUSED', 'Confidence': 1.860809326171875}, {'Type': 'SAD', 'Confidence': 0.6087124347686768}], 'Landmarks': [{'Type': 'eyeLeft', 'X': 0.48109936714172363, 'Y': 0.4396161437034607}, {'Type': 'eyeRight', 'X': 0.5576461553573608, 'Y': 0.43761444091796875}, {'Type'

In [150]:
face_str = face_str[1:-1]
face_dict = ast.literal_eval(face_str)
face_dict

({'AgeRange': {'High': 43, 'Low': 26},
  'Beard': {'Confidence': 99.94194793701172, 'Value': False},
  'BoundingBox': {'Height': 0.22355769574642181,
   'Left': 0.40264421701431274,
   'Top': 0.3413461446762085,
   'Width': 0.22355769574642181},
  'Confidence': 99.95332336425781,
  'Emotions': [{'Confidence': 99.32299041748047, 'Type': 'HAPPY'},
   {'Confidence': 1.860809326171875, 'Type': 'CONFUSED'},
   {'Confidence': 0.6087124347686768, 'Type': 'SAD'}],
  'Eyeglasses': {'Confidence': 99.99933624267578, 'Value': False},
  'EyesOpen': {'Confidence': 99.9548110961914, 'Value': True},
  'Gender': {'Confidence': 100.0, 'Value': 'Female'},
  'Landmarks': [{'Type': 'eyeLeft',
    'X': 0.48109936714172363,
    'Y': 0.4396161437034607},
   {'Type': 'eyeRight', 'X': 0.5576461553573608, 'Y': 0.43761444091796875},
   {'Type': 'nose', 'X': 0.5411320924758911, 'Y': 0.4765285551548004},
   {'Type': 'mouthLeft', 'X': 0.48678645491600037, 'Y': 0.5083348155021667},
   {'Type': 'mouthRight', 'X': 0.55

In [151]:
len(face_dict)

4

In [152]:
face_dict[0]

{'AgeRange': {'High': 43, 'Low': 26},
 'Beard': {'Confidence': 99.94194793701172, 'Value': False},
 'BoundingBox': {'Height': 0.22355769574642181,
  'Left': 0.40264421701431274,
  'Top': 0.3413461446762085,
  'Width': 0.22355769574642181},
 'Confidence': 99.95332336425781,
 'Emotions': [{'Confidence': 99.32299041748047, 'Type': 'HAPPY'},
  {'Confidence': 1.860809326171875, 'Type': 'CONFUSED'},
  {'Confidence': 0.6087124347686768, 'Type': 'SAD'}],
 'Eyeglasses': {'Confidence': 99.99933624267578, 'Value': False},
 'EyesOpen': {'Confidence': 99.9548110961914, 'Value': True},
 'Gender': {'Confidence': 100.0, 'Value': 'Female'},
 'Landmarks': [{'Type': 'eyeLeft',
   'X': 0.48109936714172363,
   'Y': 0.4396161437034607},
  {'Type': 'eyeRight', 'X': 0.5576461553573608, 'Y': 0.43761444091796875},
  {'Type': 'nose', 'X': 0.5411320924758911, 'Y': 0.4765285551548004},
  {'Type': 'mouthLeft', 'X': 0.48678645491600037, 'Y': 0.5083348155021667},
  {'Type': 'mouthRight', 'X': 0.5509493350982666, 'Y':

In [153]:
face_dict[1]

{'AgeRange': {'High': 2, 'Low': 0},
 'Beard': {'Confidence': 99.86708068847656, 'Value': False},
 'BoundingBox': {'Height': 0.18149039149284363,
  'Left': 0.629807710647583,
  'Top': 0.34014421701431274,
  'Width': 0.18149039149284363},
 'Confidence': 99.63973999023438,
 'Emotions': [{'Confidence': 86.6963119506836, 'Type': 'HAPPY'},
  {'Confidence': 5.673420429229736, 'Type': 'SURPRISED'},
  {'Confidence': 2.77254581451416, 'Type': 'ANGRY'}],
 'Eyeglasses': {'Confidence': 99.98480224609375, 'Value': False},
 'EyesOpen': {'Confidence': 99.99874114990234, 'Value': True},
 'Gender': {'Confidence': 91.08841705322266, 'Value': 'Male'},
 'Landmarks': [{'Type': 'eyeLeft',
   'X': 0.6781446933746338,
   'Y': 0.43020063638687134},
  {'Type': 'eyeRight', 'X': 0.7412738800048828, 'Y': 0.40906137228012085},
  {'Type': 'nose', 'X': 0.7094468474388123, 'Y': 0.4493544101715088},
  {'Type': 'mouthLeft', 'X': 0.7043896913528442, 'Y': 0.48282521963119507},
  {'Type': 'mouthRight', 'X': 0.75383627414703

In [154]:
face_dict[2]

{'AgeRange': {'High': 5, 'Low': 1},
 'Beard': {'Confidence': 99.97213745117188, 'Value': False},
 'BoundingBox': {'Height': 0.1742788404226303,
  'Left': 0.14423076808452606,
  'Top': 0.31370192766189575,
  'Width': 0.1742788404226303},
 'Confidence': 99.7412109375,
 'Emotions': [{'Confidence': 99.87226104736328, 'Type': 'HAPPY'},
  {'Confidence': 3.6798834800720215, 'Type': 'CONFUSED'},
  {'Confidence': 0.3291487693786621, 'Type': 'CALM'}],
 'Eyeglasses': {'Confidence': 99.99671173095703, 'Value': False},
 'EyesOpen': {'Confidence': 99.98033905029297, 'Value': True},
 'Gender': {'Confidence': 100.0, 'Value': 'Female'},
 'Landmarks': [{'Type': 'eyeLeft',
   'X': 0.19454257190227509,
   'Y': 0.39647889137268066},
  {'Type': 'eyeRight', 'X': 0.2611493468284607, 'Y': 0.38403192162513733},
  {'Type': 'nose', 'X': 0.2364542931318283, 'Y': 0.4200401306152344},
  {'Type': 'mouthLeft', 'X': 0.20674018561840057, 'Y': 0.4474387764930725},
  {'Type': 'mouthRight', 'X': 0.266448438167572, 'Y': 0.4

In [155]:
face_dict[3]

{'AgeRange': {'High': 15, 'Low': 10},
 'Beard': {'Confidence': 99.95126342773438, 'Value': False},
 'BoundingBox': {'Height': 0.10216346383094788,
  'Left': 0.7884615659713745,
  'Top': 0.7668269276618958,
  'Width': 0.10216346383094788},
 'Confidence': 97.27440643310547,
 'Emotions': [{'Confidence': 91.88949584960938, 'Type': 'ANGRY'},
  {'Confidence': 14.306652069091797, 'Type': 'SAD'},
  {'Confidence': 5.094576358795166, 'Type': 'HAPPY'}],
 'Eyeglasses': {'Confidence': 96.83731842041016, 'Value': True},
 'EyesOpen': {'Confidence': 99.9999008178711, 'Value': False},
 'Gender': {'Confidence': 99.8965835571289, 'Value': 'Male'},
 'Landmarks': [{'Type': 'eyeLeft',
   'X': 0.8194406628608704,
   'Y': 0.8212051391601562},
  {'Type': 'eyeRight', 'X': 0.8360863924026489, 'Y': 0.8040536642074585},
  {'Type': 'nose', 'X': 0.8269283175468445, 'Y': 0.8326889276504517},
  {'Type': 'mouthLeft', 'X': 0.8483017683029175, 'Y': 0.8540371656417847},
  {'Type': 'mouthRight', 'X': 0.8588249087333679, 'Y

In [214]:
# only keep photos of one person
drop_index = []
for i in range(len(df)):
    # check string length, <3000 is one person
    if len(df['FaceDetails'].iloc[i]) > 3000:
        drop_index.append(df.index[i])

In [216]:
df = df.drop(drop_index)

In [217]:
len(df)

1202

In [218]:
df.head()

Unnamed: 0,index,GroupIndex,fileName,Labels,FaceDetails,Personality
3,3,3,Facebook/ENTJ/10100524843917367.jpg,"[{'Name': 'Human', 'Confidence': 99.3036117553...",[{'BoundingBox': {'Width': 0.43888887763023376...,ENTJ
4,4,4,Facebook/ENTJ/10100593000838573.jpg,"[{'Name': 'Human', 'Confidence': 99.3088073730...","[{'BoundingBox': {'Width': 0.2988888919353485,...",ENTJ
5,5,5,Facebook/ENTJ/10100722478231739.jpg,"[{'Name': 'People', 'Confidence': 99.175758361...","[{'BoundingBox': {'Width': 0.2266666740179062,...",ENTJ
6,6,6,Facebook/ENTJ/10100814294055092.jpg,"[{'Name': 'People', 'Confidence': 98.967948913...","[{'BoundingBox': {'Width': 0.2728365361690521,...",ENTJ
7,7,7,Facebook/ENTJ/10100874504219050.jpg,"[{'Name': 'People', 'Confidence': 99.174240112...","[{'BoundingBox': {'Width': 0.5322222113609314,...",ENTJ


In [219]:
# get smile, brightness, sharpness

SmileConfList = []
SmileValueList = []
BrightnessList = []
SharpnessList = []

for face_str in df['FaceDetails']:
    # delete [] of string
    face_str = face_str[1:-1]
    
    # convert to dict
    face_dict = ast.literal_eval(face_str)
    
    SmileConfList.append(face_dict['Smile']['Confidence'])
    SmileValueList.append(face_dict['Smile']['Value'])
    BrightnessList.append(face_dict['Quality']['Brightness'])
    SharpnessList.append(face_dict['Quality']['Sharpness'])

In [220]:
len(SmileConfList)

1202

In [221]:
len(SmileValueList)

1202

In [222]:
len(BrightnessList)

1202

In [223]:
len(SharpnessList)

1202

In [226]:
df['SmileConf'] = SmileConfList
df['SmileValue'] = SmileValueList
df['Brightness'] = BrightnessList
df['Sharpness'] = SharpnessList

In [227]:
df.head()

Unnamed: 0,index,GroupIndex,fileName,Labels,FaceDetails,Personality,SmileConf,SmileValue,Brightness,Sharpness
3,3,3,Facebook/ENTJ/10100524843917367.jpg,"[{'Name': 'Human', 'Confidence': 99.3036117553...",[{'BoundingBox': {'Width': 0.43888887763023376...,ENTJ,51.429409,True,64.833603,99.124046
4,4,4,Facebook/ENTJ/10100593000838573.jpg,"[{'Name': 'Human', 'Confidence': 99.3088073730...","[{'BoundingBox': {'Width': 0.2988888919353485,...",ENTJ,97.421249,True,44.480965,99.996712
5,5,5,Facebook/ENTJ/10100722478231739.jpg,"[{'Name': 'People', 'Confidence': 99.175758361...","[{'BoundingBox': {'Width': 0.2266666740179062,...",ENTJ,93.711639,True,47.003876,99.471344
6,6,6,Facebook/ENTJ/10100814294055092.jpg,"[{'Name': 'People', 'Confidence': 98.967948913...","[{'BoundingBox': {'Width': 0.2728365361690521,...",ENTJ,95.180687,True,38.081123,99.996712
7,7,7,Facebook/ENTJ/10100874504219050.jpg,"[{'Name': 'People', 'Confidence': 99.174240112...","[{'BoundingBox': {'Width': 0.5322222113609314,...",ENTJ,97.194908,True,38.869888,99.996712


In [230]:
df = df.drop(['GroupIndex','Labels','FaceDetails'], axis='columns')
df.head()

Unnamed: 0,index,fileName,Personality,SmileConf,SmileValue,Brightness,Sharpness
3,3,Facebook/ENTJ/10100524843917367.jpg,ENTJ,51.429409,True,64.833603,99.124046
4,4,Facebook/ENTJ/10100593000838573.jpg,ENTJ,97.421249,True,44.480965,99.996712
5,5,Facebook/ENTJ/10100722478231739.jpg,ENTJ,93.711639,True,47.003876,99.471344
6,6,Facebook/ENTJ/10100814294055092.jpg,ENTJ,95.180687,True,38.081123,99.996712
7,7,Facebook/ENTJ/10100874504219050.jpg,ENTJ,97.194908,True,38.869888,99.996712


In [231]:
df.to_csv('CleanData1.csv')