In [59]:
import pandas as pd
import numpy as np
from sklearn.decomposition import FactorAnalysis

### Read Data

In [91]:
df_population = pd.read_csv('data/df_voting_population.csv')
df_wealth_edu = pd.read_csv('data/df_wealth_education.csv')

In [92]:
df_wealth_edu['State'] = df_wealth_edu['State'].str.upper()
df_wealth_edu = df_wealth_edu[['Year', 'State', 'Race', 'Education', 'Income']]
df_wealth_edu

Unnamed: 0,Year,State,Race,Education,Income
0,2010,CONNECTICUT,White,0.575076,35000.0
1,2010,CONNECTICUT,Black,0.414773,22450.0
2,2010,MAINE,White,0.468618,23700.0
3,2010,MAINE,Black,0.388060,12500.0
4,2010,MASSACHUSETTS,White,0.592768,32500.0
...,...,...,...,...,...
505,2018,ALASKA,Black,0.597826,30000.0
506,2018,HAWAII,White,0.671066,40000.0
507,2018,HAWAII,Black,0.566138,34000.0
508,2018,DISTRICT OF COLUMBIA,White,0.922500,75000.0


In [93]:
df_population = df_population[['year', 'State', 'Race', 'Total population', 'Total voted']]
df_population.rename(columns={'year': 'Year'}, inplace=True)
df_population['Total population'] = df_population['Total population'].str.replace(',', '').astype(int)
df_population['Total voted'] = df_population['Total voted'].str.replace(',', '').replace('-', 0).astype(int)
df_population

Unnamed: 0,Year,State,Race,Total population,Total voted
0,2010,US,Total,229690,95987
1,2010,US,White,185804,80554
2,2010,US,Black,27396,11149
3,2010,US,Hispanic,32457,6646
4,2010,ALABAMA,Total,3526,1509
...,...,...,...,...,...
1035,2018,WISCONSIN,Hispanic,232,84
1036,2018,WYOMING,Total,430,220
1037,2018,WYOMING,White,408,214
1038,2018,WYOMING,Black,5,2


In [94]:
df_wealth_edu = df_wealth_edu.set_index(['Year', 'State', 'Race'])
df_wealth_edu

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Education,Income
Year,State,Race,Unnamed: 3_level_1,Unnamed: 4_level_1
2010,CONNECTICUT,White,0.575076,35000.0
2010,CONNECTICUT,Black,0.414773,22450.0
2010,MAINE,White,0.468618,23700.0
2010,MAINE,Black,0.388060,12500.0
2010,MASSACHUSETTS,White,0.592768,32500.0
...,...,...,...,...
2018,ALASKA,Black,0.597826,30000.0
2018,HAWAII,White,0.671066,40000.0
2018,HAWAII,Black,0.566138,34000.0
2018,DISTRICT OF COLUMBIA,White,0.922500,75000.0


In [95]:
df_population = df_population.set_index(['Year', 'State', 'Race'])
df_population

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Total population,Total voted
Year,State,Race,Unnamed: 3_level_1,Unnamed: 4_level_1
2010,US,Total,229690,95987
2010,US,White,185804,80554
2010,US,Black,27396,11149
2010,US,Hispanic,32457,6646
2010,ALABAMA,Total,3526,1509
...,...,...,...,...
2018,WISCONSIN,Hispanic,232,84
2018,WYOMING,Total,430,220
2018,WYOMING,White,408,214
2018,WYOMING,Black,5,2


In [96]:
df = df_population.join(df_wealth_edu, how='inner')
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Total population,Total voted,Education,Income
Year,State,Race,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010,ALABAMA,White,2611,1097,0.475073,24100.0
2010,ALABAMA,Black,868,403,0.384688,15000.0
2010,ALASKA,White,385,209,0.594025,35850.0
2010,ALASKA,Black,24,7,0.657895,34400.0
2010,ARIZONA,White,4267,1936,0.563546,26200.0
...,...,...,...,...,...,...
2018,WEST VIRGINIA,Black,49,12,0.397094,16800.0
2018,WISCONSIN,White,3948,2574,0.509030,32000.0
2018,WISCONSIN,Black,264,110,0.427223,18000.0
2018,WYOMING,White,408,214,0.549745,32000.0


### Factor Analysis

In [97]:
transformer = FactorAnalysis(n_components=1, random_state=0)

In [98]:
arr = np.array(df)
arr

array([[2.61100000e+03, 1.09700000e+03, 4.75072967e-01, 2.41000000e+04],
       [8.68000000e+02, 4.03000000e+02, 3.84687584e-01, 1.50000000e+04],
       [3.85000000e+02, 2.09000000e+02, 5.94025157e-01, 3.58500000e+04],
       ...,
       [2.64000000e+02, 1.10000000e+02, 4.27222659e-01, 1.80000000e+04],
       [4.08000000e+02, 2.14000000e+02, 5.49744898e-01, 3.20000000e+04],
       [5.00000000e+00, 2.00000000e+00, 5.00000000e-01, 2.70500000e+04]])

In [101]:
df['Political Power'] = transformer.fit_transform(df)

In [102]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Total population,Total voted,Education,Income,Political Power
Year,State,Race,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010,ALABAMA,White,2611,1097,0.475073,24100.0,-0.115389
2010,ALABAMA,Black,868,403,0.384688,15000.0,-1.305053
2010,ALASKA,White,385,209,0.594025,35850.0,1.416403
2010,ALASKA,Black,24,7,0.657895,34400.0,1.238508
2010,ARIZONA,White,4267,1936,0.563546,26200.0,0.170893
...,...,...,...,...,...,...,...
2018,WEST VIRGINIA,Black,49,12,0.397094,16800.0,-1.071852
2018,WISCONSIN,White,3948,2574,0.509030,32000.0,0.911680
2018,WISCONSIN,Black,264,110,0.427223,18000.0,-0.912222
2018,WYOMING,White,408,214,0.549745,32000.0,0.912956
