In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
#Loading predictions and original scores from both models
single_scaler = pd.read_csv('..\data\interim\single_scaler_results.csv', index_col=0)
essay_dependent_scaler = pd.read_csv('..\data\interim\essay_dependent_scaler_results.csv', index_col=0)

In [3]:
#Creating residual columns
single_scaler['residuals_scaled'] = single_scaler.orig_scaled - single_scaler.pred_scaled
single_scaler['residuals_unscaled'] = single_scaler.orig_unscaled - single_scaler.pred_unscaled

essay_dependent_scaler['residuals_scaled'] = essay_dependent_scaler.orig_scaled - essay_dependent_scaler.pred_scaled
essay_dependent_scaler['residuals_unscaled'] = essay_dependent_scaler.orig_unscaled - essay_dependent_scaler.pred_unscaled

In [7]:
single_scaler.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3244 entries, 6970 to 996
Data columns (total 7 columns):
essay_set             3244 non-null int64
pred_scaled           3244 non-null float64
pred_unscaled         3244 non-null float64
orig_scaled           3244 non-null float64
orig_unscaled         3244 non-null float64
residuals_scaled      3244 non-null float64
residuals_unscaled    3244 non-null float64
dtypes: float64(6), int64(1)
memory usage: 362.8 KB


In [5]:
essay_dependent_scaler.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3244 entries, 6970 to 996
Data columns (total 7 columns):
essay_set             3244 non-null int64
pred_scaled           3244 non-null float64
pred_unscaled         3244 non-null float64
orig_scaled           3244 non-null float64
orig_unscaled         3244 non-null float64
residuals_scaled      3244 non-null float64
residuals_unscaled    3244 non-null float64
dtypes: float64(6), int64(1)
memory usage: 202.8 KB


In [None]:
sns.scatterplot(x='orig_unscaled', y='pred_unscaled', data=single_scaler, 
                hue='essay_set', legend='full', palette = sns.color_palette('Set1', n_colors=8))
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.xlabel('Original Scores\n(scaled)')
plt.ylabel('Residuals\n(scaled)')
plt.title('Single-scaler Model')
plt.savefig('figures/single_residuals.png', bbox_inches='tight')

In [None]:
sns.scatterplot(x='orig_unscaled', y='pred_unscaled', data=essay_dependent_scaler, 
                hue='essay_set', legend='full', palette = sns.color_palette('Set1', n_colors=8))
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.xlabel('Original Scores\n(scaled)')
plt.ylabel('Residuals\n(scaled)')
plt.title('Essay-dependent scaler Model')
plt.savefig('figures/dependent_residuals.png', bbox_inches='tight')

In [9]:
max_essay_score = {1:12, 2:6, 3:3, 4:3, 5:4, 6:4, 7:30, 8:60}

def within_percent(row,percent):
    essay_set = row.essay_set
    max_score = max_essay_score[essay_set]

    actual = row.orig_unscaled
    prediction = row.pred_unscaled
    
    if abs(actual-prediction)<percent/100*max_score:
        return 1
    else:
        return 0
    

In [10]:
percent=20
single_scaler['within'] = single_scaler.apply(within_percent, percent=percent, axis=1)
essay_dependent_scaler['within'] = essay_dependent_scaler.apply(within_percent, percent=percent, axis=1)

In [6]:
single_scaler.groupby('essay_set').mean().within

AttributeError: 'DataFrame' object has no attribute 'within'

In [None]:
essay_dependent_scaler.groupby('essay_set').mean().within

In [None]:
single_scaler.describe()

In [None]:
essay_dependent_scaler.describe()

In [None]:
max_essay_score = {1:12, 2:6, 3:3, 4:3, 5:4, 6:4, 7:30, 8:60}

def outside_percent(row,percent):
    essay_set = row.essay_set
    max_score = max_essay_score[essay_set]
    
    actual = row.orig_unscaled
    prediction = row.pred_unscaled
    
    if abs(actual-prediction)>percent/100*max_score:
        return 1
    else:
        return 0
    
percent=20
single_scaler['outside'] = single_scaler.apply(outside_percent, percent=percent, axis=1)
essay_dependent_scaler['outside'] = essay_dependent_scaler.apply(outside_percent, percent=percent, axis=1)