# Combining dataframes from group members into one dataframe

In [1]:
# imports
import pandas as pd
import numpy as np

----

## Jack's Data - Income, Employment

In [2]:
# read in Jack's df and set index
jack = pd.read_csv('Jacks_df.csv', index_col = 'Unnamed: 0')
jack['county'] = jack['county_name'].apply(lambda x: x.lower() + ' county')
index = jack['county']
jack.drop(columns = 'county_name', inplace = True)
jack.set_index('county', inplace = True)

---

## Chloe's Data - Voting Registration and Turnout Numbers

In [3]:
# read in Chloe's df and set index
voting_nums = pd.read_csv('./data/voting_numbers.csv', index_col = 'Unnamed: 0')
voting_nums.drop_duplicates(inplace = True)
voting_nums.set_index('county', inplace = True)

In [4]:
# convert columns to numeric
def percent_to_float(percent):
    """
    Converts string percentage to decimal proportion
    percent - string of number ending in "%" to convert to float
    """
    return (float(percent[:-1]) / 100)

def convert_columns(data):
    perc_list = [col for col in data.columns if "perc" in col]

    for col in perc_list:
        data[col] = data[col].apply(percent_to_float)
    
    for col in data.columns:
        if data[col].dtype == 'object':
            data[col] = data[col].apply(lambda x: x.replace(',', ''))
    
    for col in data.columns:
        data[col] = data[col].apply(pd.to_numeric)

convert_columns(voting_nums)

In [5]:
# resettting these values - incorrect in table
voting_nums.loc['mcculloch county','2020_voted_num'] = 3436
voting_nums.loc['mcculloch county','2020_voted_perc'] = round(3436/5361, 2)
voting_nums.loc['mcculloch county','2020_early_vote_num'] = np.nan
voting_nums.loc['mcculloch county','2020_early_vote_perc'] = np.nan

In [6]:
df = jack.merge(right = voting_nums, how = 'left', left_index = True, right_index = True)
# check shape
print('Expected Merged Columns:',jack.shape[1] + voting_nums.shape[1])
print('Merged Columns:', df.shape[1])

Expected Merged Columns: 23
Merged Columns: 23


----

## Chloe's Data - Voting Numbers by Party

In [7]:
# read in chloe's second df and set index
voting_party_data = pd.read_csv('./data/voting_party_data.csv')
voting_party_data.set_index('County', inplace = True)
voting_party_data.index.rename('county', inplace=True)

# convert columns to numeric
convert_columns(voting_party_data)

# merge df to combined
df = df.merge(right = voting_party_data, how = 'left', left_index = True, right_index = True)

# check shape
print('Expected Merged Columns:',jack.shape[1] + voting_nums.shape[1] + voting_party_data.shape[1])
print('Merged Columns:', df.shape[1])

Expected Merged Columns: 55
Merged Columns: 55


----

## Lucy's Data - Demographics

In [8]:
asr = pd.read_csv('./data/asr_12_16_20.csv')
asr.set_index('county', inplace = True)

df = df.merge(right = asr, how = 'left', left_index = True, right_index = True)
print('Expected Merged Columns:',jack.shape[1] + voting_nums.shape[1] + voting_party_data.shape[1] + asr.shape[1])
print('Merged Columns:', df.shape[1])

Expected Merged Columns: 460
Merged Columns: 460


---

## Export CSV

In [9]:
df.to_csv('combined_df.csv')