In [2]:
import pandas as pd
import numpy as np
import scipy.stats as stats
from matplotlib import pyplot as plt

## Dataset for Political Power

In [85]:
# combining the data in each year into one data frame
years = [2010, 2012, 2014, 2016, 2018]
df_political = pd.DataFrame()

for y in years:
    df = pd.read_csv('data/table04b_{}.csv'.format(y), skiprows=3)
    df = df[0:572]
    # unify the column name of the data frame
    columns_name = ['State', 'Race', 'Total population', 'Total citizen population',
               'Total registered', 'Percent registered (Total)', 'Margin of error 1', 'Percent registered (Citizen)',
               'Margin of error 1', 'Total voted', 'Percent voted (Total)', 'Margin of error 1', 'Percent voted (Citizen)',
               'Margin of error 1']
    columns_mapping = {}
    for i in range(len(columns_name)):
        columns_mapping[df.columns[i]] = columns_name[i]
    df = df.rename(columns=columns_mapping)
    df = df[columns_name]
    # change NaN in the row
    for i in range(df.shape[0]):
        if not pd.isna(df['State'][i]):
            state_name = df.loc[i]['State']
        else:
            df.loc[i]['State'] = state_name
    # for the column 'Race', we only need data that is 'total', 'black', 'white', 'hispanic'
    df = df.loc[(df['Race'] == 'Total') | (df['Race'] == 'White alone') | (df['Race'] == 'Black alone') | (df['Race'] == 'Hispanic (of any race)')]
    df.loc[df['Race'] == 'White alone', 'Race'] = 'White'
    df.loc[df['Race'] == 'Black alone', 'Race'] = 'Black'
    df.loc[df['Race'] == 'Hispanic (of any race)', 'Race'] = 'Hispanic'
    # add year to the dataframe
    df.insert(loc=0, column='year', value=[y for _ in range(df.shape[0])])
    # combine the dataframe in each year
    df_political = pd.concat([df_political, df])
df_political[:30]

Unnamed: 0,year,State,Race,Total population,Total citizen population,Total registered,Percent registered (Total),Margin of error 1,Margin of error 1.1,Margin of error 1.2,...,Percent voted (Total),Margin of error 1.3,Margin of error 1.4,Margin of error 1.5,Margin of error 1.6,Percent voted (Citizen),Margin of error 1.7,Margin of error 1.8,Margin of error 1.9,Margin of error 1.10
0,2010,US,Total,229690,210800,137263,59.8,0.3,0.3,0.3,...,41.8,0.3,0.3,0.3,0.3,45.5,0.3,0.3,0.3,0.3
3,2010,US,White,185804,172447,114482,61.6,0.3,0.3,0.3,...,43.4,0.3,0.3,0.3,0.3,46.7,0.3,0.3,0.3,0.3
5,2010,US,Black,27396,25632,16101,58.8,1,1,1,...,40.7,1,1,1,1.1,43.5,1,1,1,1.1
7,2010,US,Hispanic,32457,21285,10982,33.8,1.2,1.5,1,...,20.5,1.2,1.5,1,1.4,31.2,1.2,1.5,1,1.4
11,2010,ALABAMA,Total,3526,3420,2224,63.1,2.4,2.4,2.5,...,42.8,2.4,2.4,2.5,2.5,44.1,2.4,2.4,2.5,2.5
14,2010,ALABAMA,White,2611,2521,1612,61.7,2.8,2.8,2.8,...,42,2.8,2.8,2.8,2.9,43.5,2.8,2.8,2.8,2.9
16,2010,ALABAMA,Black,868,868,599,69,5.6,5.6,6,...,46.4,5.6,5.6,6,6,46.4,5.6,5.6,6,6
18,2010,ALABAMA,Hispanic,142,65,34,23.6,16.5,28.6,11.7,...,10.2,16.5,28.6,11.7,23.8,22.1,16.5,28.6,11.7,23.8
22,2010,ALASKA,Total,498,478,317,63.8,2.5,2.6,2.6,...,48.6,2.5,2.6,2.6,2.7,50.7,2.5,2.6,2.6,2.7
25,2010,ALASKA,White,385,375,262,68.2,2.8,2.8,3,...,54.3,2.8,2.8,3,3,55.7,2.8,2.8,3,3
