### Looking at the performance/predictions of linear regression on FAST ratings

In [2]:
PREDICTIONS_FILE = 'predictions/linreg_predictions.csv'
TRUE_RATINGS_FILE = 'landmarks/landmarks_100.csv'
TRUE_GENDERS_FILE = 'landmarks/landmarks_100_binarygenderlabels.csv'

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from IPython.display import display
pd.options.display.precision = 3
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = 100

#load data from files
df_predictions = pd.read_csv(PREDICTIONS_FILE)
df_lm_fast = pd.read_csv(TRUE_RATINGS_FILE)
df_lm_gender = pd.read_csv(TRUE_GENDERS_FILE)
df_ratings = df_lm_fast['rating']
df_true_gender = df_lm_gender['gender']

df = pd.concat([df_predictions, df_ratings, df_true_gender], axis = 1)
display(df)

Unnamed: 0.1,Unnamed: 0,image_url,prediction,rating,gender
0,0,18_0_3_20170119151213151.jpg.chip,2.568,1.75,0
1,1,18_1_3_20170117135709510.jpg.chip,4.49,5.0,1
2,2,19_0_1_20170114025936564.jpg.chip,2.297,0.4,0
3,3,20_0_3_20170113133050904.jpg.chip,3.171,1.6,0
4,4,21_1_3_20170119154311378.jpg.chip,1.665,2.8,1
5,5,22_0_1_20170113193211629.jpg.chip,1.747,0.8,0
6,6,22_1_0_20170117141120129.jpg.chip,4.207,5.8,1
7,7,23_0_0_20170117144116074.jpg.chip,2.849,0.4,0
8,8,24_0_2_20170116165047009.jpg.chip,3.165,2.8,0
9,9,24_1_2_20170116173444326.jpg.chip,3.422,5.4,1


In [3]:
#Look at highest and lowest rated faces

df_highest = df.nlargest(10, 'prediction')
print('Highest rating predictions')
print(df_highest[['image_url', 'prediction', 'rating']])

df_smallest = df.nsmallest(10, 'prediction')
print('\nLowest rating predictions')
print(df_smallest[['image_url', 'prediction', 'rating']])

Highest rating predictions
                            image_url  prediction  rating
12  24_1_4_20170103230137338.jpg.chip       5.087    5.20
73  35_1_0_20170113005254692.jpg.chip       4.724    4.20
40  28_1_1_20170113012030016.jpg.chip       4.717    5.20
25  26_1_1_20170117202000565.jpg.chip       4.505    4.80
1   18_1_3_20170117135709510.jpg.chip       4.490    5.00
6   22_1_0_20170117141120129.jpg.chip       4.207    5.80
80  38_1_1_20170112204552523.jpg.chip       4.187    4.80
69  35_0_1_20170113182403214.jpg.chip       4.169    1.00
43  29_0_2_20170116163356668.jpg.chip       4.151    3.00
41  28_1_1_20170113012508729.jpg.chip       4.070    4.75

Lowest rating predictions
                            image_url  prediction  rating
31  27_0_1_20170117010459432.jpg.chip       0.920     0.4
57  32_0_1_20170117140805502.jpg.chip       1.206     0.6
32  27_0_3_20170119152415944.jpg.chip       1.287     1.0
15  25_0_0_20170119171125583.jpg.chip       1.307     1.2
54  32_0_0_2017010

In [4]:
#Look at accuracy at predicting gender, using <3.5 as male and >3.5 as female

_sum = 0
for i in range(len(df.index)):
    if (df.loc[i, 'prediction'] < 3.5 and df.loc[i, 'gender'] == 0) or (df.loc[i, 'prediction'] >= 3.5 and df.loc[i, 'gender'] == 1):
        _sum += 1
accuracy = _sum/len(df.index)

print(accuracy)

#The accuracy for logistic regression was: 0.8969072164948454
#however, these 100 faces were also part of the train set

0.7070707070707071


In [5]:
#Look at performance separately for males and females

df_m= df.loc[df['gender'] == 0] #males
mse_m = mean_squared_error(df_m['rating'], df_m['prediction'])
print("male mean squared error: ", mse_m)

df_f= df.loc[df['gender'] == 1] #females
mse_f = mean_squared_error(df_f['rating'], df_f['prediction'])
print("female mean squared error: ", mse_f)

mse_all = mean_squared_error(df['rating'], df['prediction'])
print("both mean squared error: ", mse_all)

male mean squared error:  2.469165687083346
female mean squared error:  3.0928069201424777
both mean squared error:  2.7274413492593497


In [6]:
#Look at variance of scores

all_var = df['prediction'].var()
f_var = df_f['prediction'].var()
m_var = df_m['prediction'].var()

print('variance: ', all_var, 
      '\nvariance males: ', m_var, 
      '\nvariance females: ', f_var)

variance:  0.8058792778806763 
variance males:  0.5725139852124772 
variance females:  0.6440391609625193
