# Count the number of census blocks per county.

In [3]:
import pandas as pd

# skip the second row, which contains descriptions
sf1 = pd.read_csv('data/DEC_10_SF1_combined.csv', skiprows=[1])

# make the tract, county and state separate
sf1['tract'] = sf1['GEO.display-label'].apply(lambda x: x.split(sep=',')[0])
sf1['county'] = sf1['GEO.display-label'].apply(lambda x: x.split(sep=',')[1])
sf1['state'] = sf1['GEO.display-label'].apply(lambda x: x.split(sep=',')[2])

# compute some derived fields
sf1['pct_rent'] = sf1['H4-D004'] / sf1['H4-D001'] * 100
sf1['pct_black'] = sf1['P3-D003'] / sf1['P3-D001'] * 100
sf1['pct_asian'] = sf1['P3-D005'] / sf1['P3-D001'] * 100
sf1['pct_white'] = sf1['P3-D002'] / sf1['P3-D001'] * 100
sf1['pct_hisp'] = sf1['P4-D003'] / sf1['P4-D001'] * 100
sf1['pct_vacant'] = sf1['H5-D001'] / sf1['H1-D001'] * 100
sf1[:5]

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,P1-D001,P3-D001,P3-D002,P3-D003,P3-D004,P3-D005,P3-D006,...,H5-D008,tract,county,state,pct_rent,pct_black,pct_asian,pct_white,pct_hisp,pct_vacant
0,1400000US21001970100,21001970100,"Census Tract 9701, Adair County, Kentucky",1727,1727,1683,14,1,0,1,...,60,Census Tract 9701,Adair County,Kentucky,17.411402,0.810654,0.0,97.452229,2.142444,16.794872
1,1400000US21001970200,21001970200,"Census Tract 9702, Adair County, Kentucky",1722,1722,1635,41,5,0,0,...,35,Census Tract 9702,Adair County,Kentucky,19.252874,2.380952,0.0,94.947735,2.61324,25.16129
2,1400000US21001970300,21001970300,"Census Tract 9703, Adair County, Kentucky",3016,3016,2944,6,11,8,0,...,106,Census Tract 9703,Adair County,Kentucky,20.521173,0.198939,0.265252,97.612732,1.856764,15.19337
3,1400000US21001970401,21001970401,"Census Tract 9704.01, Adair County, Kentucky",4070,4070,3716,237,1,16,1,...,109,Census Tract 9704.01,Adair County,Kentucky,37.215909,5.823096,0.39312,91.302211,1.547912,10.795743
4,1400000US21001970402,21001970402,"Census Tract 9704.02, Adair County, Kentucky",4261,4261,3950,180,16,16,3,...,70,Census Tract 9704.02,Adair County,Kentucky,30.911681,4.22436,0.375499,92.701244,1.900962,11.642542


In [4]:
sf2 = sf1["GEO.id"].groupby(sf1["county"]).count().to_frame("count_GUs")
sf2

Unnamed: 0_level_0,count_GUs
county,Unnamed: 1_level_1
Adair County,7
Allen County,6
Anderson County,5
Ballard County,3
Barren County,10
...,...
Wayne County,5
Webster County,4
Whitley County,8
Wolfe County,2


# Calculate total households per county.

In [5]:
sf3 = sf1["H1-D001"].groupby(sf1["county"]).sum().to_frame("HHperCounty")
sf3

Unnamed: 0_level_0,HHperCounty
county,Unnamed: 1_level_1
Adair County,8568
Allen County,9307
Anderson County,9127
Ballard County,3885
Barren County,19188
...,...
Wayne County,10942
Webster County,5936
Whitley County,15166
Wolfe County,3660


# Calculate percent renters by county. (Careful not to calculate the mean percent rental across blocks in a county)

In [69]:
sf_renters = sf1["H4-D004"].groupby(sf1["county"]).sum().to_frame("Renters")
sf_renters

Unnamed: 0_level_0,Renters
county,Unnamed: 1_level_1
Adair County,1860
Allen County,1898
Anderson County,2067
Ballard County,727
Barren County,5449
...,...
Wayne County,2386
Webster County,1245
Whitley County,4121
Wolfe County,876


In [71]:
sf_total = sf1["H4-D001"].groupby(sf1["county"]).sum().to_frame("totalHH")
sf_total

Unnamed: 0_level_0,totalHH
county,Unnamed: 1_level_1
Adair County,7285
Allen County,7848
Anderson County,8369
Ballard County,3397
Barren County,16999
...,...
Wayne County,8646
Webster County,5272
Whitley County,13575
Wolfe County,3065


In [72]:
sf_pct_renters = sf_renters["Renters"]/sf_total["totalHH"]

In [73]:
sf_pct_renters

county
 Adair County       0.255319
 Allen County       0.241845
 Anderson County    0.246983
 Ballard County     0.214012
 Barren County      0.320548
                      ...   
 Wayne County       0.275966
 Webster County     0.236153
 Whitley County     0.303573
 Wolfe County       0.285808
 Woodford County    0.289823
Length: 120, dtype: float64

# Calculate percent vacant by county.

In [114]:
grouped = sf1[["H4-D004","H4-D001"]].groupby(sf1['county']).sum().reset_index()
grouped["pct_vaccant"] = grouped["H4-D004"]/grouped["H4-D001"]
grouped

Unnamed: 0,county,H4-D004,H4-D001,pct_vaccant
0,Adair County,1860,7285,0.255319
1,Allen County,1898,7848,0.241845
2,Anderson County,2067,8369,0.246983
3,Ballard County,727,3397,0.214012
4,Barren County,5449,16999,0.320548
...,...,...,...,...
115,Wayne County,2386,8646,0.275966
116,Webster County,1245,5272,0.236153
117,Whitley County,4121,13575,0.303573
118,Wolfe County,876,3065,0.285808


# Calculate mean, min and max vacancy rate (at the block level) by county.

In [112]:
sf1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1115 entries, 0 to 1114
Data columns (total 40 columns):
GEO.id               1115 non-null object
GEO.id2              1115 non-null int64
GEO.display-label    1115 non-null object
P1-D001              1115 non-null int64
P3-D001              1115 non-null int64
P3-D002              1115 non-null int64
P3-D003              1115 non-null int64
P3-D004              1115 non-null int64
P3-D005              1115 non-null int64
P3-D006              1115 non-null int64
P3-D007              1115 non-null int64
P3-D008              1115 non-null int64
P4-D001              1115 non-null int64
P4-D002              1115 non-null int64
P4-D003              1115 non-null int64
H1-D001              1115 non-null int64
H3-D001              1115 non-null int64
H3-D002              1115 non-null int64
H3-D003              1115 non-null int64
H4-D001              1115 non-null int64
H4-D002              1115 non-null int64
H4-D003              1115 non-

In [47]:
def mean_vaccancy(arr):
    return  arr.mean()

def max_vaccancy(arr):
    return  arr.max()

def min_vaccancy(arr):
    return  arr.min()

In [119]:
m_min = sf1[["pct_vacant"]].groupby(sf1['county']).min()
m_min

# m_max = sf1["pct_vacant"].groupby(sf1['county']).transform(max_vaccancy)
# # m_max.head(50)

# m_min = sf1["pct_vacant"].groupby(sf1['county']).transform(min_vaccancy)
# m_min.head(50)

Unnamed: 0_level_0,pct_vacant
county,Unnamed: 1_level_1
Adair County,10.795743
Allen County,9.254975
Anderson County,5.492297
Ballard County,11.608392
Barren County,7.457767
...,...
Wayne County,9.452736
Webster County,9.475375
Whitley County,8.378044
Wolfe County,15.848353


In [120]:
m_max = sf1[["pct_vacant"]].groupby(sf1['county']).max()
m_max

Unnamed: 0_level_0,pct_vacant
county,Unnamed: 1_level_1
Adair County,25.161290
Allen County,27.559055
Anderson County,13.452028
Ballard County,14.648603
Barren County,22.975207
...,...
Wayne County,41.228851
Webster County,13.524590
Whitley County,13.972810
Wolfe County,16.577279


In [121]:
m_mean = sf1[["pct_vacant"]].groupby(sf1['county']).mean()
m_mean

Unnamed: 0_level_0,pct_vacant
county,Unnamed: 1_level_1
Adair County,15.868316
Allen County,15.808521
Anderson County,8.927276
Ballard County,12.640382
Barren County,11.444842
...,...
Wayne County,18.476934
Webster County,11.019754
Whitley County,10.906092
Wolfe County,16.212816


# Calculate the 90th percentile of vacancy rate (at the block level) by county.

In [122]:
import numpy as np
def percentile(arr):
    return np.percentile(arr,90)

In [136]:
county_90th = sf1["pct_vacant"].groupby(sf1['county']).transform(percentile)
county_90th

0       20.649714
1       20.649714
2       20.649714
3       20.649714
4       20.649714
          ...    
1110    12.934668
1111    12.934668
1112    12.934668
1113    12.934668
1114    12.934668
Name: pct_vacant, Length: 1115, dtype: float64

In [135]:
sf_merge = pd.merge(sf1,county_90th,left_index=True,right_index=True)
sf_merge

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,P1-D001,P3-D001,P3-D002,P3-D003,P3-D004,P3-D005,P3-D006,...,tract,county,state,pct_rent,pct_black,pct_asian,pct_white,pct_hisp,pct_vacant_x,pct_vacant_y
0,1400000US21001970100,21001970100,"Census Tract 9701, Adair County, Kentucky",1727,1727,1683,14,1,0,1,...,Census Tract 9701,Adair County,Kentucky,17.411402,0.810654,0.000000,97.452229,2.142444,16.794872,20.649714
1,1400000US21001970200,21001970200,"Census Tract 9702, Adair County, Kentucky",1722,1722,1635,41,5,0,0,...,Census Tract 9702,Adair County,Kentucky,19.252874,2.380952,0.000000,94.947735,2.613240,25.161290,20.649714
2,1400000US21001970300,21001970300,"Census Tract 9703, Adair County, Kentucky",3016,3016,2944,6,11,8,0,...,Census Tract 9703,Adair County,Kentucky,20.521173,0.198939,0.265252,97.612732,1.856764,15.193370,20.649714
3,1400000US21001970401,21001970401,"Census Tract 9704.01, Adair County, Kentucky",4070,4070,3716,237,1,16,1,...,Census Tract 9704.01,Adair County,Kentucky,37.215909,5.823096,0.393120,91.302211,1.547912,10.795743,20.649714
4,1400000US21001970402,21001970402,"Census Tract 9704.02, Adair County, Kentucky",4261,4261,3950,180,16,16,3,...,Census Tract 9704.02,Adair County,Kentucky,30.911681,4.224360,0.375499,92.701244,1.900962,11.642542,20.649714
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1110,1400000US21239050106,21239050106,"Census Tract 501.06, Woodford County, Kentucky",3261,3261,3062,91,6,16,0,...,Census Tract 501.06,Woodford County,Kentucky,19.984627,2.790555,0.490647,93.897577,5.826434,7.795889,12.934668
1111,1400000US21239050107,21239050107,"Census Tract 501.07, Woodford County, Kentucky",3757,3757,3315,184,3,27,0,...,Census Tract 501.07,Woodford County,Kentucky,19.793966,4.897525,0.718659,88.235294,8.357732,5.951557,12.934668
1112,1400000US21239050200,21239050200,"Census Tract 502, Woodford County, Kentucky",3533,3533,3421,29,3,22,1,...,Census Tract 502,Woodford County,Kentucky,14.873646,0.820832,0.622700,96.829890,2.236060,9.889395,12.934668
1113,1400000US21239050300,21239050300,"Census Tract 503, Woodford County, Kentucky",1899,1899,1751,23,3,12,6,...,Census Tract 503,Woodford County,Kentucky,34.898477,1.211164,0.631912,92.206424,6.424434,14.902808,12.934668
