In [1]:
import numpy as np
import pandas as pd
import pickle
from sklearn.metrics.pairwise import cosine_similarity, manhattan_distances, euclidean_distances
import plotly.graph_objects as go
import plotly.express as px

In [2]:
# Binary
f = open('adj_df.p', 'rb')
df1 = pickle.load(f)
f.close()

# Multiclass
f = open('mul_df.p', 'rb')
df2 = pickle.load(f)
f.close()

In [66]:
# Create class object encapsulating relevent data and methods
class scotus(object):
    def __init__(self, df):
        self.df = df
        self.justices = list(df.index)
        self.current_justice = None
        self.j_courts = None
        
        # Case ranges
        self.j_cases = {}
        for j in self.justices:
            cases = self.df.loc[j].dropna().index
            r = (min(cases), max(cases))
            self.j_cases[j] = r
            
        # Make initial court
        self.courts = []
        court = []
        cs = self.j_cases
        js = self.justices
        leaving = []
        starts = []
        first = min(self.df.columns)
        last = max(self.df.columns)
        for j in js:
            if cs[j][0] == first:
                court.append(j)
            else:
                starts.append((j, cs[j][0]))
            if cs[j][1] < last:
                leaving.append((j, cs[j][1]))
        
        # Find retired justices and new justices
        leaving.sort(key=lambda x: x[1])
        starts.sort(key=lambda x: x[1])
        assert len(leaving) == len(starts)
        self.courts.append(court.copy())
    
        # Build new courts
        for i, j in enumerate(leaving):
            court.remove(j[0])
            court.append(starts[i][0])
            self.courts.append(court.copy())
        
    def __len__(self):
        return len(self.df.columns)
    
    def __str__(self):
        return f'Justices: {len(self.justices)}\nCases: {len(self.df.columns)}\nCourts: {len(self.courts)}'
    
    def __repr__(self):
        return f'Justices: {len(self.justices)}\nCases: {len(self.df.columns)}\nCourts: {len(self.courts)}'
    
            
    def get_court(self, court):
        '''
        Get justice similarities
        '''

        return self.df.loc[court].dropna(axis=1)
    
    def sim_matrix(self, all_justices=True):
        '''
        Return similarity matrix (Numpy array)
        '''
        if all_justices:
            jus = list(self.justices)
            l = len(jus)
            sim_mat = np.zeros((l,l))

            for i in range(l):
                for j in range(l):   
                    anb = np.where(self.df.loc[jus[i]].notna() & self.df.loc[jus[j]].notna(), self.df.columns, np.nan)
                    if len([ x for x in anb if str(x) != 'nan' ]) != 0:
                        j_a = np.array(self.df[[ x for x in anb if str(x) != 'nan' ]].loc[jus[i]])
                        j_b = np.array(self.df[[ x for x in anb if str(x) != 'nan' ]].loc[jus[j]])
                        sim_mat[i][j] = np.round(cosine_similarity(j_a.reshape(1, len(j_a)), j_b.reshape(1, len(j_a))), 4)
                    else:
                        sim_mat[i][j] = np.nan
                        
            return sim_mat
        
        else:
            assert self.j_courts, 'Set justice'
            court_sims = []
            for court in self.j_courts:
                df = self.get_court(court)
                sim_mat = get_sim_mat(df)
                court_sims.append(sim_mat)
            
            return court_sims

    def two_dim_court(self, court_num):
        '''
        2-component representation of justices within a particular court (using PCA), returns DataFrame
        '''
        
        assert court_num in range(0, len(self.courts)), f'Choose int from 0-{len(self.courts)-1}'
        
        temp_df = pd.DataFrame([ self.df.loc[j] for j in self.courts[court_num] ]).dropna(axis=1)
        X = temp_df.values
        pca = PCA(n_components=2)
        comp = pd.DataFrame(pca.fit_transform(X), columns=['x', 'y'])
        comp['justice'] = temp_df.index
        
        return comp

    def set_justice(self, justice):
        '''
        Set a justice
        '''

        assert justice in self.justices, f'Not a justice between 1999-2019.\nChoose one of {self.justices}'
        self.current_justice = justice
        courts = []
        for i, court in enumerate(self.courts):
            if justice in court:
                courts.append(i)
        self.j_courts = [ self.courts[court] for court in courts ]
        print(f'Courts Justice {justice} has served on:')
        for court in self.j_courts:
            print(court)

    def _two_dims(self, courts):
        assert self.j_courts, 'Set justice'
        comps = [ two_dim_court(court) for court in self.j_courts ]
        self.comps = comps

def get_sim_mat(df):
    sim_mat = np.zeros((9,9))
    justices = list(df.index)
    for i in range(9):
        for j in range(9):
            j_a = np.array(df.loc[justices[i]])
            j_a = j_a.reshape(1, len(j_a))
            j_b = np.array(df.loc[justices[j]])
            j_b = j_b.reshape(1, len(j_b))
            sim_mat[i][j] = np.round(cosine_similarity(j_a, j_b), 4)

    return sim_mat

In [67]:
sc_obj = scotus(df1)

In [68]:
sc_obj.set_justice('Alito')

Courts Justice Alito has served on:
['Breyer', 'Ginsburg', 'Kennedy', 'Scalia', 'Souter', 'Stevens', 'Thomas', 'Roberts', 'Alito']
['Breyer', 'Ginsburg', 'Kennedy', 'Scalia', 'Stevens', 'Thomas', 'Roberts', 'Alito', 'Sotomayor']
['Breyer', 'Ginsburg', 'Kennedy', 'Scalia', 'Thomas', 'Roberts', 'Alito', 'Sotomayor', 'Kagan']
['Breyer', 'Ginsburg', 'Kennedy', 'Thomas', 'Roberts', 'Alito', 'Sotomayor', 'Kagan', 'Gorsuch']
['Breyer', 'Ginsburg', 'Thomas', 'Roberts', 'Alito', 'Sotomayor', 'Kagan', 'Gorsuch', 'Kavanaugh']


In [69]:
court_sims = sc_obj.sim_matrix(all_justices=False)

In [70]:
court_sims

[array([[1.    , 0.6279, 0.2344, 0.0559, 0.6623, 0.685 , 0.0839, 0.1097,
         0.1336],
        [0.6279, 1.    , 0.1019, 0.0729, 0.7325, 0.7475, 0.0657, 0.0215,
         0.056 ],
        [0.2344, 0.1019, 1.    , 0.1626, 0.1256, 0.115 , 0.1171, 0.268 ,
         0.3996],
        [0.0559, 0.0729, 0.1626, 1.    , 0.0899, 0.0658, 0.7541, 0.6026,
         0.5478],
        [0.6623, 0.7325, 0.1256, 0.0899, 1.    , 0.7117, 0.0971, 0.0847,
         0.092 ],
        [0.685 , 0.7475, 0.115 , 0.0658, 0.7117, 1.    , 0.0889, 0.0581,
         0.0842],
        [0.0839, 0.0657, 0.1171, 0.7541, 0.0971, 0.0889, 1.    , 0.5428,
         0.5578],
        [0.1097, 0.0215, 0.268 , 0.6026, 0.0847, 0.0581, 0.5428, 1.    ,
         0.7012],
        [0.1336, 0.056 , 0.3996, 0.5478, 0.092 , 0.0842, 0.5578, 0.7012,
         1.    ]]),
 array([[1.    , 0.6677, 0.1622, 0.    , 0.6156, 0.1185, 0.1529, 0.1383,
         0.703 ],
        [0.6677, 1.    , 0.343 , 0.    , 0.5423, 0.1252, 0.2425, 0.2194,
         0.686 

In [62]:
a = np.array(sc_obj.get_court(sc_obj.j_courts[0]).loc['Alito'])
a = a.reshape(1, len(a))
b = np.array(sc_obj.get_court(sc_obj.j_courts[0]).loc['Thomas'])
b = b.reshape(1, len(b))

In [65]:
np.round(cosine_similarity(a, b), 4)

array([[0.5578]])