In [1]:
import pandas as pd
import numpy as np 
pd.set_option('display.max_columns', None)  

## Reading in Data:
### Bills: Bill metadata and computed subject areas
### Bill ideologies: Bill ideology scores computed from subject-specific scores of cosponsors
### Member ideologies: Subject-specific ideology scores for all legislators as computed from their vote records in each subject


In [2]:
bill_df = pd.read_csv('data_store/dataframes/full_congress_level_df8.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
all_avg_df = pd.read_csv('data_store/ides_by_bill/all_avg_ides.csv') 
d_avg_df = pd.read_csv('data_store/ides_by_bill/d_avg_ides.csv')
r_avg_df = pd.read_csv('data_store/ides_by_bill/r_avg_ides.csv')
bill_avg_df = pd.read_csv('data_store/ides_by_bill/updated_bill_ideology_scores.csv') 

In [4]:
bill_avg_df.dropna(subset=['major_label_1 ideology', 'major_label_2 ideology',
                           'major_label_3 ideology'], axis=0, inplace=True)

In [5]:
bill_avg_df.reset_index(drop=True, inplace=True)

In [6]:
print ('All: ', all_avg_df['bill_id'].nunique())
print ('Dems: ', d_avg_df['bill_id'].nunique()) 
print ('Reps: ', r_avg_df['bill_id'].nunique())
print ('Bills: ', bill_avg_df['bill_id'].nunique())

All:  255449
Dems:  255449
Reps:  255449
Bills:  232625


#### The lower unique bill number from the bill average comes from the bill ideology being directly computed from bill cosponsors and the lack of member vote data for the 99th and 100th Congresses results in fewer bill ideologies being able to be computed. 

In [8]:
def normed_ide_scores(normal_df, df, new_label_designation):
    new_cols = ['bill_id', new_label_designation+'_avg_label_1 ideology',
                new_label_designation +'_avg_label_2 ideology',
                new_label_designation+'_avg_label_3 ideology']
    df.columns = new_cols
    df[new_cols[1]] -= normal_df['all_avg_label_1 ideology']
    df[new_cols[2]] -= normal_df['all_avg_label_2 ideology']
    df[new_cols[3]] -= normal_df['all_avg_label_3 ideology']
    return df 

In [9]:
def normed_bill_ide_scores(normal_df, df, new_label_designation):
    new_cols = ['bill_id', 'cosponsors', new_label_designation+' avg_label_1 ideology',
                new_label_designation +' avg_label_2 ideology',
                new_label_designation+' avg_label_3 ideology']
    df.columns = new_cols
    df = pd.merge(df, normal_df, on='bill_id', how='inner') 
    df[new_cols[2]] -= df['all_avg_label_1 ideology']
    df[new_cols[3]] -= df['all_avg_label_2 ideology']
    df[new_cols[4]] -= df['all_avg_label_3 ideology'] 
    df.drop(['all_avg_label_1 ideology', 'all_avg_label_2 ideology',
                    'all_avg_label_3 ideology'], axis=1, inplace=True)
    return df 

In [10]:
def ideological_distance(point1_df, point2_df, point_1_cols, point_2_cols,
                         distance_col_name):
    merged = pd.merge(point1_df, point2_df, on='bill_id', how='left')
    x1 = point_1_cols[0]
    y1 = point_1_cols[1]
    z1 = point_1_cols[2]
    x2 = point_2_cols[0]
    y2 = point_2_cols[1]
    z2 = point_2_cols[2]
    merged[distance_col_name] = np.sqrt((merged[x1]-merged[x2])**2 +(
    merged[y1]-merged[y2])**2 + (merged[z1]-merged[z2])**2) 
    return merged 

In [11]:
def bill_id_splitter(df):
    df['congress'] = [x.split('-', 1)[-1] for x in df['bill_id']]
    df['num'] = [x.split('-', 1)[0] for x in df['bill_id']]
    df['bill_number'] = df['num'].str.replace(r'\D+', '')
    df['bill_number'] = df['bill_number'].astype(int) 
    df['bill_type'] = df['bill_id'].str.replace(r'[^a-zA-Z ]+', '')
    df = df.sort_values(['congress', 'bill_type', 'bill_number'])
    df.drop('num', axis=1, inplace=True)
    df.reset_index(drop=True, inplace=True) 
    return df 

In [12]:
normed_d_scores = normed_ide_scores(all_avg_df, d_avg_df, 'd')
normed_r_scores = normed_ide_scores(all_avg_df, r_avg_df, 'r') 

In [13]:
normed_bill_scores = normed_bill_ide_scores(all_avg_df, bill_avg_df, 'bill')  

In [14]:
len(normed_bill_scores)

232625

In [15]:
normed_bill_scores.isnull().sum()

bill_id                      0
cosponsors                   0
bill avg_label_1 ideology    0
bill avg_label_2 ideology    0
bill avg_label_3 ideology    0
dtype: int64

In [16]:
normed_bill_scores.head(2)

Unnamed: 0,bill_id,cosponsors,bill avg_label_1 ideology,bill avg_label_2 ideology,bill avg_label_3 ideology
0,s2009-93,0,0.109918,1.629064,1.701028
1,s4114-93,7,1.427705,2.111049,1.274009


In [17]:
normed_r_scores.head(2) 

Unnamed: 0,bill_id,r_avg_label_1 ideology,r_avg_label_2 ideology,r_avg_label_3 ideology
0,hr1-93,0.0,0.0,0.94393
1,hr2-93,0.94393,0.94393,0.94393


In [18]:
bill_avg_names = ['bill avg_label_1 ideology',
               'bill avg_label_2 ideology',
               'bill avg_label_3 ideology']

all_avg_names = ['all_avg_label_1 ideology',
               'all_avg_label_2 ideology',
               'all_avg_label_3 ideology'] 

d_avg_names = ['d_avg_label_1 ideology',
               'd_avg_label_2 ideology',
               'd_avg_label_3 ideology'] 

r_avg_names = ['r_avg_label_1 ideology',
               'r_avg_label_2 ideology',
               'r_avg_label_3 ideology'] 

In [19]:
bill_to_avg = ideological_distance(bill_avg_df, all_avg_df, bill_avg_names,
                                   all_avg_names, 'bill_to_all_avg')

In [20]:
normed_bill_to_d = ideological_distance(normed_bill_scores, normed_d_scores,
                                        bill_avg_names, d_avg_names, 'bill_to_d_avg')

In [21]:
normed_bill_to_r = ideological_distance(normed_bill_scores, normed_r_scores,
                                        bill_avg_names, r_avg_names, 'bill_to_r_avg')

In [22]:
d_to_r = ideological_distance(normed_d_scores, normed_r_scores,
                                        d_avg_names, r_avg_names, 'avg_party_distance')

In [23]:
bill_to_avg = bill_id_splitter(bill_to_avg)
normed_bill_to_d = bill_id_splitter(normed_bill_to_d)
normed_bill_to_r = bill_id_splitter(normed_bill_to_r)
d_to_r = bill_id_splitter(d_to_r) 

In [24]:
bill_to_avg = bill_to_avg[['bill_id', 'congress', 'bill_number', 'bill_type',
                           'bill_to_all_avg']]
normed_bill_to_d = normed_bill_to_d[['bill_id', 'congress', 'bill_number', 'bill_type',
                           'bill_to_d_avg']]
normed_bill_to_r = normed_bill_to_r[['bill_id', 'congress', 'bill_number', 'bill_type',
                           'bill_to_r_avg']]
d_to_r = d_to_r[['bill_id', 'congress', 'bill_number', 'bill_type',
                           'avg_party_distance']]

In [25]:
len(bill_to_avg)

232625

In [26]:
bill_to_avg.isnull().sum()

bill_id            0
congress           0
bill_number        0
bill_type          0
bill_to_all_avg    0
dtype: int64

In [27]:
merge_1 = pd.merge(bill_to_avg, normed_bill_to_d, on=['bill_id', 'congress',
                                                      'bill_number', 'bill_type'])

In [28]:
merge_1.isnull().sum() 

bill_id            0
congress           0
bill_number        0
bill_type          0
bill_to_all_avg    0
bill_to_d_avg      0
dtype: int64

In [29]:
merge_2 = pd.merge(merge_1, normed_bill_to_r, on=['bill_id', 'congress',
                                                      'bill_number', 'bill_type'])

In [30]:
merge_2.isnull().sum() 

bill_id            0
congress           0
bill_number        0
bill_type          0
bill_to_all_avg    0
bill_to_d_avg      0
bill_to_r_avg      0
dtype: int64

In [31]:
ide_distance_df = pd.merge(merge_2, d_to_r, on=['bill_id', 'congress',
                                                      'bill_number', 'bill_type'])

In [35]:
bill_avg_df.drop('cosponsors', axis=1, inplace=True)

In [32]:
ide_distance_df.isnull().sum() 

bill_id               0
congress              0
bill_number           0
bill_type             0
bill_to_all_avg       0
bill_to_d_avg         0
bill_to_r_avg         0
avg_party_distance    0
dtype: int64

In [38]:
ide_distance_df = pd.merge(ide_distance_df, bill_avg_df, how='left', on='bill_id')

In [39]:
ide_distance_df.to_csv('ide_')

Unnamed: 0,bill_id,congress,bill_number,bill_type,bill_to_all_avg,bill_to_d_avg,bill_to_r_avg,avg_party_distance,bill avg_label_1 ideology,bill avg_label_2 ideology,bill avg_label_3 ideology
0,hr1-101,101,1,hr,2.143566,5.602550,3.363983,8.898429,0.000000,0.000000,0.000000
1,hr2-101,101,2,hr,1.238659,4.638007,3.084791,7.707234,-0.043475,-0.059530,-0.050532
2,hr3-101,101,3,hr,1.387370,4.425024,2.763239,7.183218,-0.043869,-0.040240,-0.041421
3,hr4-101,101,4,hr,2.004842,4.702746,1.822937,6.444026,0.000000,0.000000,0.000000
4,hr5-101,101,5,hr,1.427031,3.775931,1.826029,5.577704,-0.043484,-0.056371,-0.050378
5,hr6-101,101,6,hr,1.678977,4.388606,2.068440,6.430810,-0.002870,0.005400,0.004651
6,hr7-101,101,7,hr,0.582607,3.618295,3.304172,6.911511,-0.281553,-0.315338,-0.396930
7,hr8-101,101,8,hr,1.445295,4.225800,2.131101,6.322707,0.017480,0.012894,0.021255
8,hr9-101,101,9,hr,0.474441,2.735312,4.344905,7.062334,-1.190595,-0.743925,-0.873399
9,hr10-101,101,10,hr,0.771391,3.257827,2.487275,5.713459,-0.137351,-0.131462,-0.146651
