# Predicting Justice Votes

In [40]:
# Standard imports
import numpy as np
import pandas as pd

# Models
from sklearn.naive_bayes import BernoulliNB

# Import data
import pickle

# Scotus class object
from scotus_class import scotus

In [9]:
# Binary
f = open('adj_df.p', 'rb')
df1 = pickle.load(f)
f.close()

# Multiclass
f = open('mul_df.p', 'rb')
df2 = pickle.load(f)
f.close()

## Initial Test of Predicting Justice Opinions
In this initial test, each case a justice has produced an opinion on is a potential target variable.  Justice Rehnquist being the first justice to retire on the list of justices evaluated is the test case.

In [41]:
# Instantiate scotus object for references
sc_obj = scotus(df1)

In [53]:
# Select Rehnquist
sc_obj.justice_courts('Rehnquist')
print('Court Number(s):', sc_obj.j_courts)

Court Number(s): [0]


In [55]:
# Rehnquist court
rehn_court = sc_obj.courts[0]
print(rehn_court)

['Breyer', 'Ginsburg', 'Kennedy', "O'Connor", 'Rehnquist', 'Scalia', 'Souter', 'Stevens', 'Thomas']


In [83]:
# Rehnquist term
print('First Case:', sc_obj.j_cases['Rehnquist'][0])
print('Last Case:', sc_obj.j_cases['Rehnquist'][1])

First Case: 0
Last Case: 497


In [77]:
current_df = df1.loc[rehn_court, :498]
print('Missing justice opinions:', current_df.isna().sum().sum())

Missing justice opinions: 36


In [82]:
# Drop cases with missing opinions
current_df.dropna(axis=1, inplace=True)
print('Cases with no missing opinions:', len(current_df.columns))

Cases with no missing opinions: 465


In [100]:
# Set X, y helper function
def get_xy(df, justice, case):
    X = df.drop(justice, axis=0).drop(case, axis=1)
    y = df.drop(justice, axis=0)[case]
    return X, y
    
# Prediction function
def predict_cases(df, justice):
    cases = list(df.columns)
    preds = []
    for case in cases:
        clf = BernoulliNB()
        X, y = get_xy(df, justice, case)
        clf.fit(X, y)
        pred = int(clf.predict(np.array(df.loc[justice].drop(case)).reshape(1, len(cases)-1)))
        preds.append(pred)
    return np.array(preds)

In [154]:
for justice in rehn_court:
    preds = list(predict_cases(current_df, justice))
    real = list(current_df.loc[justice])
    diff = pd.Series([ preds[i] - real[i] for i in range(len(preds)) ])
    mse = sum(diff**2) / len(brey_real)
    correct = diff.value_counts()[0]
    false_neg = diff.value_counts()[-1] # Joined majority but predicted dissent
    false_pos = diff.value_counts()[1] # Dissent but predicted join majority
    true_pos = 0
    for i in range(len(diff)):
        if preds[i] == 1 and real[i] == 1:
            true_pos += 1
    true_neg = correct - true_pos
    precision = true_pos / (true_pos+false_pos)
    recall = true_pos / (true_pos+false_neg)
    f1 = 2 * (precision*recall) / (precision+recall)
    acc = correct / len(diff)
    print('Justice:', justice)
    print('-'*30)
    print('MSE:', mse)
    print('F1-Score:', f1)
    print('Accuracy:', acc)
    print('-'*30)

Justice: Breyer
------------------------------
MSE: 0.13548387096774195
F1-Score: 0.9142857142857143
Accuracy: 0.864516129032258
------------------------------
Justice: Ginsburg
------------------------------
MSE: 0.09032258064516129
F1-Score: 0.9418282548476455
Accuracy: 0.9096774193548387
------------------------------
Justice: Kennedy
------------------------------
MSE: 0.13118279569892474
F1-Score: 0.9242236024844721
Accuracy: 0.8688172043010752
------------------------------
Justice: O'Connor
------------------------------
MSE: 0.15698924731182795
F1-Score: 0.9129916567342073
Accuracy: 0.843010752688172
------------------------------
Justice: Rehnquist
------------------------------
MSE: 0.12043010752688173
F1-Score: 0.9261213720316623
Accuracy: 0.8795698924731182
------------------------------
Justice: Scalia
------------------------------
MSE: 0.10752688172043011
F1-Score: 0.9307479224376731
Accuracy: 0.8924731182795699
------------------------------
Justice: Souter
------------

In [160]:
current_df.loc['Stevens'][:22]

0     0.0
1     0.0
2     0.0
3     0.0
4     0.0
5     1.0
6     0.0
7     1.0
8     1.0
9     0.0
10    1.0
11    1.0
12    1.0
13    0.0
14    1.0
15    1.0
16    1.0
17    1.0
18    0.0
19    1.0
20    0.0
21    1.0
Name: Stevens, dtype: float64

In [161]:
predict_cases(current_df, 'Stevens')[:22]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1])