In [1]:
import pandas as pd
import numpy as np
pd.options.mode.chained_assignment = None     # default='warn'

# S0101: age and sex

In [2]:
age_raw = pd.read_csv('./raw/ACS_15_5YR_S0101/ACS_15_5YR_S0101.csv')

# total population, median age, sex ratio (males per 100 females), old-age dependency ratio
cols = ['GEO.id', 'GEO.id2', 'HC01_EST_VC01', 'HC01_EST_VC35', 'HC01_EST_VC36', 'HC01_EST_VC38']
age_data = age_raw[cols]
age_data.rename(index = str, columns = {'HC01_EST_VC01': 'population', 'HC01_EST_VC35': 'median_age',
                                        'HC01_EST_VC36': 'sex_ratio', 'HC01_EST_VC38': 'senior_ratio'}, inplace = True)
age_data.head()

Unnamed: 0,GEO.id,GEO.id2,population,median_age,sex_ratio,senior_ratio
0,1400000US36005000100,36005000100,7703,30.7,1251.4,0.5
1,1400000US36005000200,36005000200,5403,37.3,96.9,21.1
2,1400000US36005000400,36005000400,5915,36.4,95.9,15.0
3,1400000US36005001600,36005001600,5879,38.8,77.0,27.1
4,1400000US36005001900,36005001900,2591,32.5,87.1,4.6


# S2301: employment status

In [3]:
employ_raw = pd.read_csv('./raw/ACS_15_5YR_S2301/ACS_15_5YR_S2301.csv')

# white, black, asian, hispanic, poverty, unemployment
cols = ['GEO.id2', 'HC01_EST_VC15', 'HC01_EST_VC16', 'HC01_EST_VC18', 'HC01_EST_VC19',
        'HC01_EST_VC20', 'HC01_EST_VC23', 'HC01_EST_VC36', 'HC04_EST_VC01']
employ_data = employ_raw[cols]
employ_data.rename(index = str, columns = {'HC01_EST_VC15': 'white',
                                          'HC01_EST_VC16': 'black',
                                          'HC01_EST_VC18': 'asian',
                                          'HC01_EST_VC19': 'pacific',
                                          'HC01_EST_VC20': 'others',
                                          'HC01_EST_VC23': 'hispanic',
                                          'HC01_EST_VC36': 'poverty',
                                          'HC04_EST_VC01': '%unemployment'}, inplace = True)
employ_data.head()

Unnamed: 0,GEO.id2,white,black,asian,pacific,others,hispanic,poverty,%unemployment
0,36005000100,763,4846,122,0,1855,2307,0,
1,36005000200,1869,942,204,0,1106,3184,614,7.7
2,36005000400,1672,1716,38,0,1113,2900,409,9.5
3,36005001600,1764,1690,0,0,1002,2969,664,8.7
4,36005001900,437,698,55,0,725,1044,710,19.2


In [4]:
# merge table
merged = pd.merge(age_data, employ_data, on = 'GEO.id2')

# derive: ethnic fractionalization
# https://scholar.harvard.edu/files/alesina/files/fractionalization.pdf
def get_fract(row):
    if row['population'] == 0:
        return 0   # lowest diversity
    else:
        fract = 1 - (row['white']/row['population'])**2
        fract -= (row['black']/row['population'])**2
        fract -= (row['asian']/row['population'])**2
        fract -= (row['pacific']/row['population'])**2
        fract -= (row['others']/row['population'])**2
        fract -= (row['hispanic']/row['population'])**2
        return fract

merged['fract'] = merged.apply(get_fract, axis=1)
merged.head()

Unnamed: 0,GEO.id,GEO.id2,population,median_age,sex_ratio,senior_ratio,white,black,asian,pacific,others,hispanic,poverty,%unemployment,fract
0,1400000US36005000100,36005000100,7703,30.7,1251.4,0.5,763,4846,122,0,1855,2307,0,,0.446476
1,1400000US36005000200,36005000200,5403,37.3,96.9,21.1,1869,942,204,0,1106,3184,614,7.7,0.459338
2,1400000US36005000400,36005000400,5915,36.4,95.9,15.0,1672,1716,38,0,1113,2900,409,9.5,0.560112
3,1400000US36005001600,36005001600,5879,38.8,77.0,27.1,1764,1690,0,0,1002,2969,664,8.7,0.543242
4,1400000US36005001900,36005001900,2591,32.5,87.1,4.6,437,698,55,0,725,1044,710,19.2,0.657878


In [5]:
# percentage of major races
def get_percentage(df):
    if df['population'] == 0:
        df['%white'] = np.nan
        df['%black'] = np.nan
        df['%asian'] = np.nan
        df['%hispanic'] = np.nan
        df['%poverty'] = np.nan
    else:
        df['%white'] = df['white']/df['population'] * 100
        df['%black'] = df['black']/df['population'] * 100
        df['%asian'] = df['asian']/df['population'] * 100
        df['%hispanic'] = df['hispanic']/df['population'] * 100
        df['%poverty'] = df['poverty']/df['population'] * 100
    return df

merged = merged.apply(get_percentage, axis=1)

merged = merged[['GEO.id', 'GEO.id2', 'population', 'sex_ratio', 'median_age', 'senior_ratio',
                 '%white', '%black', '%asian', '%hispanic', 'fract', '%poverty', '%unemployment']].copy()
merged.head()

Unnamed: 0,GEO.id,GEO.id2,population,sex_ratio,median_age,senior_ratio,%white,%black,%asian,%hispanic,fract,%poverty,%unemployment
0,1400000US36005000100,36005000100,7703,1251.4,30.7,0.5,9.905232,62.910554,1.583799,29.94937,0.446476,0.0,
1,1400000US36005000200,36005000200,5403,96.9,37.3,21.1,34.591893,17.434758,3.77568,58.930224,0.459338,11.364057,7.7
2,1400000US36005000400,36005000400,5915,95.9,36.4,15.0,28.267117,29.010989,0.642434,49.027895,0.560112,6.914624,9.5
3,1400000US36005001600,36005001600,5879,77.0,38.8,27.1,30.005103,28.746385,0.0,50.501786,0.543242,11.294438,8.7
4,1400000US36005001900,36005001900,2591,87.1,32.5,4.6,16.866075,26.939406,2.122733,40.293323,0.657878,27.402547,19.2


# S1901: income

In [6]:
income_raw = pd.read_csv('./raw/ACS_15_5YR_S1901/ACS_15_5YR_S1901.csv')

# median household income, income bins(2~11)
# derive: gini coeff - measure of income inequality
cols = ['GEO.id2', 'HC01_EST_VC01', 'HC01_EST_VC02', 'HC01_EST_VC03', 'HC01_EST_VC04', 'HC01_EST_VC05',
        'HC01_EST_VC06', 'HC01_EST_VC07', 'HC01_EST_VC08', 'HC01_EST_VC09', 'HC01_EST_VC10', 'HC01_EST_VC11',
       'HC01_EST_VC13']
income_data = income_raw[cols]
income_data.rename(index = str, columns = {'HC01_EST_VC01': 'households',
                                           'HC01_EST_VC02': 'less_than_10,000',
                                           'HC01_EST_VC03': '10,000-14,999',
                                           'HC01_EST_VC04': '15,000-24,999',
                                           'HC01_EST_VC05': '25,000-34,999',
                                           'HC01_EST_VC06': '35,000-49,999',
                                           'HC01_EST_VC07': '50,000-74,999',
                                           'HC01_EST_VC08': '75,000-99,999',
                                           'HC01_EST_VC09': '100,000-149,999',
                                           'HC01_EST_VC10': '150,000-199,999',
                                           'HC01_EST_VC11': '200,000_or_more',
                                           'HC01_EST_VC13': 'median_income'}, inplace = True)
income_data.head()

Unnamed: 0,GEO.id2,households,"less_than_10,000","10,000-14,999","15,000-24,999","25,000-34,999","35,000-49,999","50,000-74,999","75,000-99,999","100,000-149,999","150,000-199,999","200,000_or_more",median_income
0,36005000100,0,,,,,,,,,,,
1,36005000200,1379,10.5,1.3,7.7,8.8,5.8,18.3,14.1,25.5,6.2,1.7,72034.0
2,36005000400,1921,8.1,2.2,5.3,5.6,9.0,19.9,20.7,20.1,6.8,2.2,74836.0
3,36005001600,1964,14.9,12.4,11.8,12.6,15.0,16.4,5.8,7.8,0.6,2.7,32312.0
4,36005001900,901,16.4,5.9,6.3,16.0,23.9,14.3,8.3,7.0,1.9,0.0,37936.0


In [7]:
# compute gini coefficient
import matplotlib.pyplot as plt
%matplotlib inline

def get_income_lst(row):
    income_lst = []
    income_lst += [10000] * int(row['households']*row['less_than_10,000']/100)
    income_lst += [12500] * int(row['households']*row['10,000-14,999']/100)
    income_lst += [20000] * int(row['households']*row['15,000-24,999']/100)
    income_lst += [30000] * int(row['households']*row['25,000-34,999']/100)
    income_lst += [42500] * int(row['households']*row['35,000-49,999']/100)
    income_lst += [62500] * int(row['households']*row['50,000-74,999']/100)
    income_lst += [87500] * int(row['households']*row['75,000-99,999']/100)
    income_lst += [125000] * int(row['households']*row['100,000-149,999']/100)
    income_lst += [175000] * int(row['households']*row['150,000-199,999']/100)
    income_lst += [200000] * int(row['households']*row['200,000_or_more']/100)
    return income_lst

def get_gini(row):
    if row['households'] == 0:
        return np.nan
    income_lst = get_income_lst(row)
    cum_wealth = np.cumsum(sorted(np.append(income_lst, 0)))
    
    wealth_ratio = cum_wealth / cum_wealth[-1]
    house_quantile = np.array(range(0,len(cum_wealth))) / np.float(len(cum_wealth) - 1)
    # area under a curve
    B = np.trapz(wealth_ratio, house_quantile)
    
    A = 0.5 - B
    gini = A / (A + B)
    return gini
    
income_data['gini'] = income_data.apply(get_gini, axis=1)
income_data.head()

Unnamed: 0,GEO.id2,households,"less_than_10,000","10,000-14,999","15,000-24,999","25,000-34,999","35,000-49,999","50,000-74,999","75,000-99,999","100,000-149,999","150,000-199,999","200,000_or_more",median_income,gini
0,36005000100,0,,,,,,,,,,,,
1,36005000200,1379,10.5,1.3,7.7,8.8,5.8,18.3,14.1,25.5,6.2,1.7,72034.0,0.362496
2,36005000400,1921,8.1,2.2,5.3,5.6,9.0,19.9,20.7,20.1,6.8,2.2,74836.0,0.339986
3,36005001600,1964,14.9,12.4,11.8,12.6,15.0,16.4,5.8,7.8,0.6,2.7,32312.0,0.45036
4,36005001900,901,16.4,5.9,6.3,16.0,23.9,14.3,8.3,7.0,1.9,0.0,37936.0,0.397164


In [8]:
# merge
income_data = income_data[['GEO.id2', 'median_income', 'gini']].copy()

merged = pd.merge(merged, income_data, on = 'GEO.id2')
merged.head()

Unnamed: 0,GEO.id,GEO.id2,population,sex_ratio,median_age,senior_ratio,%white,%black,%asian,%hispanic,fract,%poverty,%unemployment,median_income,gini
0,1400000US36005000100,36005000100,7703,1251.4,30.7,0.5,9.905232,62.910554,1.583799,29.94937,0.446476,0.0,,,
1,1400000US36005000200,36005000200,5403,96.9,37.3,21.1,34.591893,17.434758,3.77568,58.930224,0.459338,11.364057,7.7,72034.0,0.362496
2,1400000US36005000400,36005000400,5915,95.9,36.4,15.0,28.267117,29.010989,0.642434,49.027895,0.560112,6.914624,9.5,74836.0,0.339986
3,1400000US36005001600,36005001600,5879,77.0,38.8,27.1,30.005103,28.746385,0.0,50.501786,0.543242,11.294438,8.7,32312.0,0.45036
4,1400000US36005001900,36005001900,2591,87.1,32.5,4.6,16.866075,26.939406,2.122733,40.293323,0.657878,27.402547,19.2,37936.0,0.397164


# DP02: social characteristics

In [9]:
social_raw = pd.read_csv('./raw/ACS_15_5YR_DP02/ACS_15_5YR_DP02.csv')

# education: %no_high_school, %higher_bachelor, residential stability: %live_same_house
cols = ['GEO.id2', 'HC03_VC95', 'HC03_VC96','HC03_VC120']
social_data = social_raw[cols]
social_data.rename(index = str, columns = {'HC03_VC95': '%high_school',
                                           'HC03_VC96': '%bachelor',
                                           'HC03_VC120': '%same_house'}, inplace = True)
social_data.head()

Unnamed: 0,GEO.id2,%high_school,%bachelor,%same_house
0,36005000100,35.5,1.3,16.7
1,36005000200,76.6,15.7,94.9
2,36005000400,83.3,23.2,93.0
3,36005001600,70.1,13.3,93.7
4,36005001900,75.6,20.0,78.6


In [10]:
# no high school
social_data['%no_high_school'] = 100 - social_data['%high_school']
social_data = social_data[['GEO.id2', '%no_high_school', '%bachelor', '%same_house']].copy()
social_data.head()

Unnamed: 0,GEO.id2,%no_high_school,%bachelor,%same_house
0,36005000100,64.5,1.3,16.7
1,36005000200,23.4,15.7,94.9
2,36005000400,16.7,23.2,93.0
3,36005001600,29.9,13.3,93.7
4,36005001900,24.4,20.0,78.6


In [11]:
# merge
merged = pd.merge(merged, social_data, on = 'GEO.id2')
merged.head()

Unnamed: 0,GEO.id,GEO.id2,population,sex_ratio,median_age,senior_ratio,%white,%black,%asian,%hispanic,fract,%poverty,%unemployment,median_income,gini,%no_high_school,%bachelor,%same_house
0,1400000US36005000100,36005000100,7703,1251.4,30.7,0.5,9.905232,62.910554,1.583799,29.94937,0.446476,0.0,,,,64.5,1.3,16.7
1,1400000US36005000200,36005000200,5403,96.9,37.3,21.1,34.591893,17.434758,3.77568,58.930224,0.459338,11.364057,7.7,72034.0,0.362496,23.4,15.7,94.9
2,1400000US36005000400,36005000400,5915,95.9,36.4,15.0,28.267117,29.010989,0.642434,49.027895,0.560112,6.914624,9.5,74836.0,0.339986,16.7,23.2,93.0
3,1400000US36005001600,36005001600,5879,77.0,38.8,27.1,30.005103,28.746385,0.0,50.501786,0.543242,11.294438,8.7,32312.0,0.45036,29.9,13.3,93.7
4,1400000US36005001900,36005001900,2591,87.1,32.5,4.6,16.866075,26.939406,2.122733,40.293323,0.657878,27.402547,19.2,37936.0,0.397164,24.4,20.0,78.6


# B25034: year built

In [12]:
year_raw = pd.read_csv('./raw/ACS_15_5YR_B25034/ACS_15_5YR_B25034.csv')

# mean building age
cols = ['GEO.id2', 'HD01_VD01', 'HD01_VD02', 'HD01_VD03', 'HD01_VD04', 'HD01_VD05',
        'HD01_VD06', 'HD01_VD07', 'HD01_VD08', 'HD01_VD09', 'HD01_VD10', 'HD01_VD11']
year_data = year_raw[cols]
year_data.rename(index = str, columns = {'HD01_VD01': 'total',
                                        'HD01_VD02': '2014_newer',
                                        'HD01_VD03': '2010-2013',
                                        'HD01_VD04': '2000-2009',
                                        'HD01_VD05': '1990-1999',
                                        'HD01_VD06': '1980-1989',
                                        'HD01_VD07': '1970-1979',
                                        'HD01_VD08': '1960-1969',
                                        'HD01_VD09': '1950-1959',
                                        'HD01_VD10': '1940-1949',
                                        'HD01_VD11': '1939_earlier'}, inplace = True)
year_data.head()

Unnamed: 0,GEO.id2,total,2014_newer,2010-2013,2000-2009,1990-1999,1980-1989,1970-1979,1960-1969,1950-1959,1940-1949,1939_earlier
0,36005000100,0,0,0,0,0,0,0,0,0,0,0
1,36005000200,1492,0,0,261,63,41,113,245,134,196,439
2,36005000400,2129,0,13,805,333,75,118,178,113,86,408
3,36005001600,2046,0,39,242,142,0,919,441,105,26,132
4,36005001900,947,0,259,159,18,27,0,45,30,24,385


In [13]:
# compute mean structure age
aim = 2015

def get_mean_age(row):
    if row['total'] == 0:
        return 0
    else:
        age_sum = (aim-2014)*row['2014_newer'] + (aim-2012)*row['2010-2013'] + (aim-2005)*row['2000-2009'] + (aim-1995)*row['1990-1999'] + (aim-1985)*row['1980-1989'] + (aim-1975)*row['1970-1979'] + (aim-1965)*row['1960-1969'] + (aim-1955)*row['1950-1959'] + (aim-1945)*row['1940-1949'] + (aim-1940)*row['1939_earlier']
        return age_sum/row['total']

year_data['avg_age'] = year_data.apply(get_mean_age, axis=1)
# replace 0 with column mean
year_data['mean_bldg_age'] = year_data['avg_age'].replace(0, year_data['avg_age'].mean())

year_data = year_data[['GEO.id2', 'mean_bldg_age']].copy()
year_data.head()

Unnamed: 0,GEO.id2,mean_bldg_age
0,36005000100,57.541626
1,36005000200,51.310322
2,36005000400,34.767027
3,36005001600,40.179374
4,36005001900,40.276663


In [14]:
# merge
merged = pd.merge(merged, year_data, on = 'GEO.id2')
merged.head()

Unnamed: 0,GEO.id,GEO.id2,population,sex_ratio,median_age,senior_ratio,%white,%black,%asian,%hispanic,fract,%poverty,%unemployment,median_income,gini,%no_high_school,%bachelor,%same_house,mean_bldg_age
0,1400000US36005000100,36005000100,7703,1251.4,30.7,0.5,9.905232,62.910554,1.583799,29.94937,0.446476,0.0,,,,64.5,1.3,16.7,57.541626
1,1400000US36005000200,36005000200,5403,96.9,37.3,21.1,34.591893,17.434758,3.77568,58.930224,0.459338,11.364057,7.7,72034.0,0.362496,23.4,15.7,94.9,51.310322
2,1400000US36005000400,36005000400,5915,95.9,36.4,15.0,28.267117,29.010989,0.642434,49.027895,0.560112,6.914624,9.5,74836.0,0.339986,16.7,23.2,93.0,34.767027
3,1400000US36005001600,36005001600,5879,77.0,38.8,27.1,30.005103,28.746385,0.0,50.501786,0.543242,11.294438,8.7,32312.0,0.45036,29.9,13.3,93.7,40.179374
4,1400000US36005001900,36005001900,2591,87.1,32.5,4.6,16.866075,26.939406,2.122733,40.293323,0.657878,27.402547,19.2,37936.0,0.397164,24.4,20.0,78.6,40.276663


# B25002: occupancy

In [15]:
occupy_raw = pd.read_csv('./raw/ACS_15_5YR_B25002/ACS_15_5YR_B25002.csv')

# occupancy rate
cols = ['GEO.id2', 'HD01_VD01', 'HD01_VD02']
occupy_data = occupy_raw[cols]
occupy_data.rename(index = str, columns = {'HD01_VD01': 'total', 'HD01_VD02': 'occupied'}, inplace = True)
occupy_data.head()

Unnamed: 0,GEO.id2,total,occupied
0,36005000100,0,0
1,36005000200,1492,1379
2,36005000400,2129,1921
3,36005001600,2046,1964
4,36005001900,947,901


In [16]:
# replace 0 with column mean
occupy_data['total_nonzero'] = occupy_data['total'].replace(0, occupy_data['total'].mean())
occupy_data['occupied_nonzero'] = occupy_data['occupied'].replace(0, occupy_data['occupied'].mean())
# calculate percentage
occupy_data['%occupancy'] = occupy_data['occupied_nonzero']/occupy_data['total_nonzero'] * 100

occupy_data = occupy_data[['GEO.id2', '%occupancy']].copy()
occupy_data.head()

Unnamed: 0,GEO.id2,%occupancy
0,36005000100,90.979845
1,36005000200,92.426273
2,36005000400,90.230155
3,36005001600,95.99218
4,36005001900,95.142555


In [17]:
# merge
merged = pd.merge(merged, occupy_data, on = 'GEO.id2')
merged.head()

Unnamed: 0,GEO.id,GEO.id2,population,sex_ratio,median_age,senior_ratio,%white,%black,%asian,%hispanic,fract,%poverty,%unemployment,median_income,gini,%no_high_school,%bachelor,%same_house,mean_bldg_age,%occupancy
0,1400000US36005000100,36005000100,7703,1251.4,30.7,0.5,9.905232,62.910554,1.583799,29.94937,0.446476,0.0,,,,64.5,1.3,16.7,57.541626,90.979845
1,1400000US36005000200,36005000200,5403,96.9,37.3,21.1,34.591893,17.434758,3.77568,58.930224,0.459338,11.364057,7.7,72034.0,0.362496,23.4,15.7,94.9,51.310322,92.426273
2,1400000US36005000400,36005000400,5915,95.9,36.4,15.0,28.267117,29.010989,0.642434,49.027895,0.560112,6.914624,9.5,74836.0,0.339986,16.7,23.2,93.0,34.767027,90.230155
3,1400000US36005001600,36005001600,5879,77.0,38.8,27.1,30.005103,28.746385,0.0,50.501786,0.543242,11.294438,8.7,32312.0,0.45036,29.9,13.3,93.7,40.179374,95.99218
4,1400000US36005001900,36005001900,2591,87.1,32.5,4.6,16.866075,26.939406,2.122733,40.293323,0.657878,27.402547,19.2,37936.0,0.397164,24.4,20.0,78.6,40.276663,95.142555


# B25003: ownership

In [18]:
owner_raw = pd.read_csv('./raw/ACS_15_5YR_B25003/ACS_15_5YR_B25003.csv')

# ownership rate
cols = ['GEO.id2', 'HD01_VD01', 'HD01_VD02']
owner_data = owner_raw[cols]
owner_data.rename(index = str, columns = {'HD01_VD01': 'total', 'HD01_VD02': 'owner_occupied'}, inplace = True)

owner_data.head()

Unnamed: 0,GEO.id2,total,owner_occupied
0,36005000100,0,0
1,36005000200,1379,696
2,36005000400,1921,1403
3,36005001600,1964,346
4,36005001900,901,85


In [19]:
# replace 0 with column mean
owner_data['total_nonzero'] = owner_data['total'].replace(0, owner_data['total'].mean())
owner_data['owner_nonzero'] = owner_data['owner_occupied'].replace(0, owner_data['owner_occupied'].mean())
# calculate percentage
owner_data['%ownership'] = owner_data['owner_nonzero']/owner_data['total_nonzero'] * 100

owner_data = owner_data[['GEO.id2', '%ownership']].copy()
owner_data.head()

Unnamed: 0,GEO.id2,%ownership
0,36005000100,31.840015
1,36005000200,50.471356
2,36005000400,73.034878
3,36005001600,17.617108
4,36005001900,9.433962


In [20]:
# merge
merged = pd.merge(merged, owner_data, on = 'GEO.id2')
merged.head()

Unnamed: 0,GEO.id,GEO.id2,population,sex_ratio,median_age,senior_ratio,%white,%black,%asian,%hispanic,...,%poverty,%unemployment,median_income,gini,%no_high_school,%bachelor,%same_house,mean_bldg_age,%occupancy,%ownership
0,1400000US36005000100,36005000100,7703,1251.4,30.7,0.5,9.905232,62.910554,1.583799,29.94937,...,0.0,,,,64.5,1.3,16.7,57.541626,90.979845,31.840015
1,1400000US36005000200,36005000200,5403,96.9,37.3,21.1,34.591893,17.434758,3.77568,58.930224,...,11.364057,7.7,72034.0,0.362496,23.4,15.7,94.9,51.310322,92.426273,50.471356
2,1400000US36005000400,36005000400,5915,95.9,36.4,15.0,28.267117,29.010989,0.642434,49.027895,...,6.914624,9.5,74836.0,0.339986,16.7,23.2,93.0,34.767027,90.230155,73.034878
3,1400000US36005001600,36005001600,5879,77.0,38.8,27.1,30.005103,28.746385,0.0,50.501786,...,11.294438,8.7,32312.0,0.45036,29.9,13.3,93.7,40.179374,95.99218,17.617108
4,1400000US36005001900,36005001900,2591,87.1,32.5,4.6,16.866075,26.939406,2.122733,40.293323,...,27.402547,19.2,37936.0,0.397164,24.4,20.0,78.6,40.276663,95.142555,9.433962


In [21]:
merged

Unnamed: 0,GEO.id,GEO.id2,population,sex_ratio,median_age,senior_ratio,%white,%black,%asian,%hispanic,...,%poverty,%unemployment,median_income,gini,%no_high_school,%bachelor,%same_house,mean_bldg_age,%occupancy,%ownership
0,1400000US36005000100,36005000100,7703,1251.4,30.7,0.5,9.905232,62.910554,1.583799,29.949370,...,0.000000,,,,64.5,1.3,16.7,57.541626,90.979845,31.840015
1,1400000US36005000200,36005000200,5403,96.9,37.3,21.1,34.591893,17.434758,3.775680,58.930224,...,11.364057,7.7,72034.0,0.362496,23.4,15.7,94.9,51.310322,92.426273,50.471356
2,1400000US36005000400,36005000400,5915,95.9,36.4,15.0,28.267117,29.010989,0.642434,49.027895,...,6.914624,9.5,74836.0,0.339986,16.7,23.2,93.0,34.767027,90.230155,73.034878
3,1400000US36005001600,36005001600,5879,77.0,38.8,27.1,30.005103,28.746385,0.000000,50.501786,...,11.294438,8.7,32312.0,0.450360,29.9,13.3,93.7,40.179374,95.992180,17.617108
4,1400000US36005001900,36005001900,2591,87.1,32.5,4.6,16.866075,26.939406,2.122733,40.293323,...,27.402547,19.2,37936.0,0.397164,24.4,20.0,78.6,40.276663,95.142555,9.433962
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2162,1400000US36085030302,36085030302,6279,97.1,36.7,16.0,38.445612,17.614270,12.056060,24.701386,...,9.237140,6.2,76542.0,0.370440,13.8,30.5,94.3,34.121688,95.142296,70.603404
2163,1400000US36085031901,36085031901,2550,59.7,30.2,25.5,15.058824,50.627451,5.764706,17.960784,...,18.313725,13.8,25064.0,0.418395,30.8,15.8,95.7,55.417152,84.679434,17.994100
2164,1400000US36085031902,36085031902,4611,79.6,29.1,13.3,21.687270,41.335936,0.910865,14.747343,...,19.778790,9.4,22656.0,0.512350,18.6,20.8,88.7,48.277954,87.500000,39.602170
2165,1400000US36085032300,36085032300,1131,111.8,41.9,11.9,40.671972,24.933687,0.000000,34.305924,...,16.887710,10.7,56406.0,0.451848,21.7,19.0,94.2,26.389961,95.559846,67.676768


In [22]:
merged.to_csv('./static/demo15.csv')