### Looking at the performance/predictions of linear regression on FAST ratings

In [1]:
PREDICTIONS_FILE = 'predictions/linreg_predictions.csv'
TRUE_RATINGS_FILE = 'landmarks/FAME_landmarks_100.csv'
TRUE_GENDERS_FILE = 'landmarks/landmarks_100_binarygenderlabels.csv'

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from IPython.display import display
pd.options.display.precision = 3
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = 100

#load data from files
df_predictions = pd.read_csv(PREDICTIONS_FILE)
df_lm_fast = pd.read_csv(TRUE_RATINGS_FILE)
df_lm_gender = pd.read_csv(TRUE_GENDERS_FILE)
df_ratings = df_lm_fast['FAST_rating']
df_true_gender = df_lm_gender['gender']

df = pd.concat([df_predictions, df_ratings, df_true_gender], axis = 1)
display(df)

Unnamed: 0.1,Unnamed: 0,image_url,prediction,FAST_rating,gender
0,0,http://snapdocket.com/fast/18_0_3_20170119151213151.jpg.chip.jpg,2.698,1.75,0
1,1,http://snapdocket.com/fast/18_1_3_20170117135709510.jpg.chip.jpg,4.321,5.0,1
2,2,http://snapdocket.com/fast/20_0_3_20170113133050904.jpg.chip.jpg,3.495,1.6,0
3,3,http://snapdocket.com/fast/21_1_3_20170119154311378.jpg.chip.jpg,2.162,2.8,1
4,4,http://snapdocket.com/fast/22_0_1_20170113193211629.jpg.chip.jpg,2.093,0.8,0
5,5,http://snapdocket.com/fast/22_1_0_20170117141120129.jpg.chip.jpg,4.764,5.8,1
6,6,http://snapdocket.com/fast/23_0_0_20170117144116074.jpg.chip.jpg,3.085,0.4,0
7,7,http://snapdocket.com/fast/24_0_2_20170116165047009.jpg.chip.jpg,3.626,2.8,0
8,8,http://snapdocket.com/fast/24_1_2_20170116173444326.jpg.chip.jpg,4.17,5.4,1
9,9,http://snapdocket.com/fast/24_1_2_20170116174612535.jpg.chip.jpg,3.819,5.2,1


In [2]:
#Look at highest and lowest rated faces

df_highest = df.nlargest(10, 'prediction')
print('Highest rating predictions')
print(df_highest[['image_url', 'prediction', 'FAST_rating']])

df_smallest = df.nsmallest(10, 'prediction')
print('\nLowest rating predictions')
print(df_smallest[['image_url', 'prediction', 'FAST_rating']])

Highest rating predictions
                                                           image_url  \
24  http://snapdocket.com/fast/26_1_1_20170117202000565.jpg.chip.jpg   
11  http://snapdocket.com/fast/24_1_4_20170103230137338.jpg.chip.jpg   
61  http://snapdocket.com/fast/34_1_1_20170117132708007.jpg.chip.jpg   
5   http://snapdocket.com/fast/22_1_0_20170117141120129.jpg.chip.jpg   
26  http://snapdocket.com/fast/26_1_2_20170116184248819.jpg.chip.jpg   
27  http://snapdocket.com/fast/26_1_2_20170116234746730.jpg.chip.jpg   
22  http://snapdocket.com/fast/26_1_1_20170116234800903.jpg.chip.jpg   
23  http://snapdocket.com/fast/26_1_1_20170117134025403.jpg.chip.jpg   
45  http://snapdocket.com/fast/29_1_1_20170117194236505.jpg.chip.jpg   
58  http://snapdocket.com/fast/32_1_2_20170116181306891.jpg.chip.jpg   

    prediction  FAST_rating  
24       5.102        4.800  
11       4.878        5.200  
61       4.849        6.000  
5        4.764        5.800  
26       4.727        5.800  


In [3]:
#Look at accuracy at predicting gender, using <3.5 as male and >3.5 as female

_sum = 0
for i in range(len(df.index)):
    if (df.loc[i, 'prediction'] < 3.5 and df.loc[i, 'gender'] == 0) or (df.loc[i, 'prediction'] >= 3.5 and df.loc[i, 'gender'] == 1):
        _sum += 1
accuracy = _sum/len(df.index)

print(accuracy)

#The accuracy for logistic regression was: 0.8969072164948454
#however, these 100 faces were also part of the train set

0.8350515463917526


In [4]:
#Look at performance separately for males and females

df_m= df.loc[df['gender'] == 0] #males
mse_m = mean_squared_error(df_m['FAST_rating'], df_m['prediction'])
print("male mean squared error: ", mse_m)

df_f= df.loc[df['gender'] == 1] #females
mse_f = mean_squared_error(df_f['FAST_rating'], df_f['prediction'])
print("female mean squared error: ", mse_f)

mse_all = mean_squared_error(df['FAST_rating'], df['prediction'])
print("both mean squared error: ", mse_all)

male mean squared error:  1.5691697792706605
female mean squared error:  2.0290806034161144
both mean squared error:  1.7588237273718785
