# Predicting Justice Votes

In [1]:
# Standard imports
import numpy as np
import pandas as pd

# Models and eval
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import ComplementNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

# Import data
import pickle

# Scotus class object
from scotus_class import scotus

In [2]:
# Binary
f = open('adj_df.p', 'rb')
df1 = pickle.load(f)
f.close()

# Multiclass
f = open('mul_df.p', 'rb')
df2 = pickle.load(f)
f.close()

## Initial Test of Predicting Justice Opinions
In this initial test, each case a justice has produced an opinion on is a potential target variable.  All other cases in which a justice has produced an opinion are treated as features.

In [3]:
# Instantiate scotus object for references
sc_obj = scotus(df1)

In [4]:
# Select Rehnquist court(s)
sc_obj.justice_courts('Rehnquist')
print('Court Number(s):', sc_obj.j_courts)

Court Number(s): [0]


In [5]:
# Rehnquist court
rehn_court = sc_obj.courts[0]
print(rehn_court)

['Breyer', 'Ginsburg', 'Kennedy', "O'Connor", 'Rehnquist', 'Scalia', 'Souter', 'Stevens', 'Thomas']


In [6]:
# Rehnquist term
print('First Case:', sc_obj.j_cases['Rehnquist'][0])
print('Last Case:', sc_obj.j_cases['Rehnquist'][1])

First Case: 0
Last Case: 497


In [7]:
current_df = df1.loc[rehn_court, :498]
print('Missing justice opinions:', current_df.isna().sum().sum())

Missing justice opinions: 36


In [8]:
# Drop cases with missing opinions
current_df.dropna(axis=1, inplace=True)
print('Cases with no missing opinions:', len(current_df.columns))

Cases with no missing opinions: 465


In [28]:
# Set X, y helper function
def get_xy(df, justice, case):
    X = df.drop(justice, axis=0).drop(case, axis=1)
    y = df.drop(justice, axis=0)[case]
    return X, y
    
# Prediction function
def predict_cases(df, justice):
    cases = list(df.columns)
    preds = []
    for case in cases:
        clf = BernoulliNB()
        X, y = get_xy(df, justice, case)
        clf.fit(X, y)
        pred = int(clf.predict(np.array(df.loc[justice].drop(case)).reshape(1, len(cases)-1)))
        preds.append(pred)
    return np.array(preds)

In [29]:
justice_preds = {}
for justice in rehn_court:
    y_preds = list(predict_cases(current_df, justice))
    y_true = list(current_df.loc[justice])
    justice_preds[justice] = y_preds
    print('Justice:', justice)
    print('-'*30)
    print('F1-Score:', f1_score(y_true, y_preds, average='weighted'))
    print('Accuracy:', accuracy_score(y_true, y_preds))
    print('-'*30)

Justice: Breyer
------------------------------
F1-Score: 0.8683445586671392
Accuracy: 0.864516129032258
------------------------------
Justice: Ginsburg
------------------------------
F1-Score: 0.9099865620038081
Accuracy: 0.9096774193548387
------------------------------
Justice: Kennedy
------------------------------
F1-Score: 0.8736929673412142
Accuracy: 0.8688172043010752
------------------------------
Justice: O'Connor
------------------------------
F1-Score: 0.8499319410004172
Accuracy: 0.843010752688172
------------------------------
Justice: Rehnquist
------------------------------
F1-Score: 0.8866067440342936
Accuracy: 0.8795698924731182
------------------------------
Justice: Scalia
------------------------------
F1-Score: 0.8939452261318046
Accuracy: 0.8924731182795699
------------------------------
Justice: Souter
------------------------------
F1-Score: 0.8953302800711384
Accuracy: 0.8924731182795699
------------------------------
Justice: Stevens
-------------------------

In [30]:
# Mean opinion
current_df.sum().sum() / (9*len(current_df.columns))

0.8148148148148148

In [31]:
# Predicted mean opinion
pd.DataFrame(justice_preds).T.sum().sum() / (9*len(current_df.columns))

0.7913978494623656

In [32]:
pred_sim = pd.DataFrame(index=current_df.index, columns=current_df.index)
for justice1 in list(current_df.index):
    sim = []
    for justice2 in list(current_df.index):
        X = np.array(justice_preds[justice1]).reshape(1,len(justice_preds[justice1]))
        Y = np.array(justice_preds[justice2]).reshape(1,len(justice_preds[justice2]))
        sim.append(float(cosine_similarity(X, Y)))
    pred_sim[justice1] = sim

In [33]:
actual = pd.DataFrame(sc_obj.sim_matrix(), index=df1.index, columns=df1.index)
actual = actual.loc[list(current_df.index)][list(current_df.index)]

In [None]:
pred_sim.mean().mean()

In [None]:
actual.mean().mean()

In [10]:
case_df = current_df.T

In [38]:
rf_preds = {}
for justice in list(current_df.index):
    X = case_df.drop(columns=justice)
    y = case_df[justice]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    rf = RandomForestClassifier()
    params = {
        'n_estimators': [15, 20, 25],
        'max_depth': [2, 3, 4, 5, 6],
        'criterion': ['gini', 'entropy'],
        'class_weight': [None, 'balanced', 'balanced_subsample'],
    }

    clf = GridSearchCV(rf, params, scoring='f1', n_jobs=-1, cv=5)
    clf.fit(X_train, y_train)
    clf_model = clf.best_estimator_
    preds = clf_model.predict(X_test)
    f1 = f1_score(y_test, preds)
    acc = accuracy_score(y_test, preds)
    print(30*'-')
    print('Justice:', justice)
    print(30*'-')
    print('F1:', f1)
    print('Accuracy:', acc)
    
    preds = []
    for case in case_df.drop(columns=justice).index:
        case = np.array(case_df.drop(columns=justice).loc[case]).reshape(1, 8)
        pred = clf_model.predict(case)
        preds.append(int(pred))
    
    rf_preds[justice] = preds



------------------------------
Justice: Breyer
------------------------------
F1: 0.9290322580645162
Accuracy: 0.8817204301075269




------------------------------
Justice: Ginsburg
------------------------------
F1: 0.9466666666666667
Accuracy: 0.9139784946236559




------------------------------
Justice: Kennedy
------------------------------
F1: 0.937142857142857
Accuracy: 0.8817204301075269




------------------------------
Justice: O'Connor
------------------------------
F1: 0.9371428571428572
Accuracy: 0.8817204301075269




------------------------------
Justice: Rehnquist
------------------------------
F1: 0.9487179487179487
Accuracy: 0.9139784946236559
------------------------------
Justice: Scalia
------------------------------
F1: 0.912751677852349
Accuracy: 0.8602150537634409
------------------------------
Justice: Souter
------------------------------
F1: 0.9426751592356688
Accuracy: 0.9032258064516129
------------------------------
Justice: Stevens
------------------------------
F1: 0.8936170212765958
Accuracy: 0.8387096774193549
------------------------------
Justice: Thomas
------------------------------
F1: 0.9041095890410958
Accuracy: 0.8494623655913979


In [39]:
rf_sim = pd.DataFrame(index=current_df.index, columns=current_df.index)
for justice1 in list(current_df.index):
    sim = []
    for justice2 in list(current_df.index):
        X = np.array(rf_preds[justice1]).reshape(1,len(rf_preds[justice1]))
        Y = np.array(rf_preds[justice2]).reshape(1,len(rf_preds[justice2]))
        sim.append(float(cosine_similarity(X, Y)))
    rf_sim[justice1] = sim

In [34]:
pred_sim

Unnamed: 0,Breyer,Ginsburg,Kennedy,O'Connor,Rehnquist,Scalia,Souter,Stevens,Thomas
Breyer,1.0,0.958333,0.77768,0.807197,0.730052,0.711303,0.957005,0.970432,0.700478
Ginsburg,0.958333,1.0,0.772389,0.802022,0.724543,0.702935,0.962553,0.976107,0.692039
Kennedy,0.77768,0.772389,1.0,0.970682,0.9418,0.942972,0.776602,0.751173,0.937604
O'Connor,0.807197,0.802022,0.970682,1.0,0.928847,0.919698,0.811245,0.787558,0.914424
Rehnquist,0.730052,0.724543,0.9418,0.928847,1.0,0.968263,0.723538,0.700728,0.965344
Scalia,0.711303,0.702935,0.942972,0.919698,0.968263,1.0,0.704746,0.678162,0.968961
Souter,0.957005,0.962553,0.776602,0.811245,0.723538,0.704746,1.0,0.969087,0.699507
Stevens,0.970432,0.976107,0.751173,0.787558,0.700728,0.678162,0.969087,1.0,0.666691
Thomas,0.700478,0.692039,0.937604,0.914424,0.965344,0.968961,0.699507,0.666691,1.0


In [35]:
actual

Unnamed: 0,Breyer,Ginsburg,Kennedy,O'Connor,Rehnquist,Scalia,Souter,Stevens,Thomas
Breyer,1.0,0.9159,0.8643,0.8798,0.8173,0.7816,0.907,0.8794,0.7648
Ginsburg,0.9159,1.0,0.8414,0.8477,0.7925,0.7733,0.9297,0.892,0.7544
Kennedy,0.8643,0.8414,1.0,0.9075,0.9337,0.889,0.8327,0.8026,0.8776
O'Connor,0.8798,0.8477,0.9075,1.0,0.9182,0.8692,0.8656,0.8239,0.8674
Rehnquist,0.8173,0.7925,0.9337,0.9182,1.0,0.9012,0.7992,0.7536,0.9108
Scalia,0.7816,0.7733,0.889,0.8692,0.9012,1.0,0.7693,0.7272,0.9394
Souter,0.907,0.9297,0.8327,0.8656,0.7992,0.7693,1.0,0.8964,0.7651
Stevens,0.8794,0.892,0.8026,0.8239,0.7536,0.7272,0.8964,1.0,0.7264
Thomas,0.7648,0.7544,0.8776,0.8674,0.9108,0.9394,0.7651,0.7264,1.0


In [27]:
rf_sim

Unnamed: 0,Breyer,Ginsburg,Kennedy,O'Connor,Rehnquist,Scalia,Souter,Stevens,Thomas
Breyer,1.0,0.957146,0.893899,0.905182,0.828878,0.766471,0.955298,0.931241,0.747317
Ginsburg,0.957146,1.0,0.864039,0.876209,0.79102,0.725601,0.947093,0.942343,0.721679
Kennedy,0.893899,0.864039,1.0,0.991361,0.943722,0.882727,0.914461,0.860234,0.870248
O'Connor,0.905182,0.876209,0.991361,1.0,0.940144,0.889605,0.925156,0.87252,0.877435
Rehnquist,0.828878,0.79102,0.943722,0.940144,1.0,0.917959,0.850542,0.7865,0.907229
Scalia,0.766471,0.725601,0.882727,0.889605,0.917959,1.0,0.786504,0.725898,0.956014
Souter,0.955298,0.947093,0.914461,0.925156,0.850542,0.786504,1.0,0.935113,0.770921
Stevens,0.931241,0.942343,0.860234,0.87252,0.7865,0.725898,0.935113,1.0,0.710685
Thomas,0.747317,0.721679,0.870248,0.877435,0.907229,0.956014,0.770921,0.710685,1.0


In [40]:
rf_preds['Stevens']

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,


In [None]:
nb_clf = BernoulliNB()
nb_clf.fit(X_train, y_train)
nb_preds = nb_clf.predict(X_test)
f1 = f1_score(y_test, nb_preds)
acc = accuracy_score(y_test, nb_preds)
print('F1:', f1)
print('Accuracy:', acc)