ABAG Landuse Calculations

This notebook documents the joining data between VTA or CCAG TAZ Zones and MTC's 1454 regions.  VTA and CCAG will be reffered to as Regional Partners. 

The inputs are the yearly calculations from MTC and VTA and CCAG.  The outputs are a csv, excel, and a shapefile.

In [1]:
#This uses geopy36
import geopandas as gpd
import pandas as pd
%matplotlib inline  
from shapely.geometry import Point

from simpledbf import Dbf5
#pip install simpledbf
#https://pypi.python.org/pypi/simpledbf/0.2.4

PyTables is not installed. No support for HDF output.


In [2]:
def prep_data(abag,rp_taz):
    rp_taz = rp_taz[['TAZ','DIST','SDIST','CITY','COUNTY','geometry','TOTHH','TOTPOP',
                                'HHPOP','EMPRES','HH1','HH2','HH3','HH4','TACRES','RESACRE','CIACRE','TEMP',
                                'RETEMP','SEREMP','OTHEMP','AGEMP','MANEMP','WHOEMP','AGE0004','AGE0519',
                                'AGE2044','AGE4564','AGE65','SFHH','MFHH']].rename(columns={"TOTHH":"RP_TOTHH","TOTPOP":"RP_TOTPOP",
                                "HHPOP":"RP_HHPOP","EMPRES":"RP_EMPRES","RESACRE":'RP_RESACRE',"CIACRE":"RP_CIACRE","TEMP":"RP_TEMP"})
    # rp_taz.rename(columns='RP_{}'.format)

    #Join the RP shapefile to the abag 2010 dataset!
    rp_calc = pd.merge(pd.merge(abag[['TAZ1454','TOTHH','RESACRE','CIACRE','TOTEMP']],
                                rel,how='outer'),rp_taz)

    rp_calc.head()
    
    return rp_calc
    # vt_calc.groupby(['TAZ1454','TAZ'])['VTA_TOTPOP'].apply(lambda x: x / x.sum())
    # vt_calc.groupby(['TAZ1454','TAZ'])['VTA_TOTPOP'].sum()

In [3]:
def landuse_calcs(rp_calc):
    """ Takes the input dataframe and does transformations, share calculations, to derive numbers for projections.
    """
    rp_tothh = rp_calc.groupby(['TAZ1454','TAZ'])['RP_TOTHH'].sum().groupby(level = 0).transform(lambda x: x/x.sum()).reset_index()

    rp_resacre = rp_calc.groupby(['TAZ1454','TAZ'])['RP_RESACRE'].sum().groupby(level = 0).transform(lambda x: x/x.sum()).reset_index()
    rp_ciacre = rp_calc.groupby(['TAZ1454','TAZ'])['RP_CIACRE'].sum().groupby(level = 0).transform(lambda x: x/x.sum()).reset_index()
    rp_temp = rp_calc.groupby(['TAZ1454','TAZ'])['RP_TEMP'].sum().groupby(level = 0).transform(lambda x: x/x.sum()).reset_index()

    rp_tothh = rp_tothh.rename(columns={"RP_TOTHH":"RP_TOTHH_share"})

    rp_resacre = rp_resacre.rename(columns={"RP_RESACRE":"RP_RESACRE_share"})
    rp_ciacre = rp_ciacre.rename(columns={"RP_CIACRE":"RP_CIACRE_share"})
    rp_temp = rp_temp.rename(columns={"RP_TEMP":"RP_TEMP_share"})

    vta_final= pd.merge(pd.merge(pd.merge(pd.merge(rp_calc,rp_tothh),rp_resacre),rp_ciacre),rp_temp)
    vta_final['abag_TOTHH_dist'] = round(vta_final['TOTHH']*vta_final['RP_TOTHH_share'])

    vta_final['abag_RESACRE_dist'] = round(vta_final['RESACRE']*vta_final['RP_RESACRE_share'])
    vta_final['abag_CIACRE_dist'] = round(vta_final['CIACRE']*vta_final['RP_CIACRE_share'])
    vta_final['abag_TEMP_dist'] = round(vta_final['TOTEMP']*vta_final['RP_TEMP_share'])

    vta_final['abag_HHPOP_dist'] = round(vta_final['RP_HHPOP']/vta_final['RP_TOTHH']*vta_final['abag_TOTHH_dist'])

    vta_final['abag_TOTPOP_dist'] = round(vta_final['RP_TOTPOP']/vta_final['RP_TOTHH']*vta_final['abag_TOTHH_dist'])

    vta_final['abag_EMPRES_dist'] = round(vta_final['RP_EMPRES']/vta_final['RP_TOTHH']*vta_final['abag_TOTHH_dist'])

    vta_final['abag_HH1_dist'] = round(vta_final['HH1']/vta_final['RP_TOTHH']*vta_final['abag_TOTHH_dist'])
    vta_final['abag_HH2_dist'] = round(vta_final['HH2']/vta_final['RP_TOTHH']*vta_final['abag_TOTHH_dist'])
    vta_final['abag_HH3_dist'] = round(vta_final['HH3']/vta_final['RP_TOTHH']*vta_final['abag_TOTHH_dist'])
    vta_final['abag_HH4_dist'] = round(vta_final['HH4']/vta_final['RP_TOTHH']*vta_final['abag_TOTHH_dist'])

    vta_final['abag_RETEMP_dist'] = round(vta_final['RETEMP']/vta_final['RP_TEMP']*vta_final['abag_TEMP_dist'])
    vta_final['abag_SEREMP_dist'] = round(vta_final['SEREMP']/vta_final['RP_TEMP']*vta_final['abag_TEMP_dist'])
    vta_final['abag_OTHEMP_dist'] = round(vta_final['OTHEMP']/vta_final['RP_TEMP']*vta_final['abag_TEMP_dist'])
    vta_final['abag_AGEMP_dist'] = round(vta_final['AGEMP']/vta_final['RP_TEMP']*vta_final['abag_TEMP_dist'])
    vta_final['abag_MANEMP_dist'] = round(vta_final['MANEMP']/vta_final['RP_TEMP']*vta_final['abag_TEMP_dist'])
    vta_final['abag_WHOEMP_dist'] = round(vta_final['WHOEMP']/vta_final['RP_TEMP']*vta_final['abag_TEMP_dist'])

    vta_final['abag_AGE0004_dist'] = round(vta_final['AGE0004']/vta_final['RP_HHPOP']*vta_final['abag_HHPOP_dist'])
    vta_final['abag_AGE0519_dist'] = round(vta_final['AGE0519']/vta_final['RP_HHPOP']*vta_final['abag_HHPOP_dist'])
    vta_final['abag_AGE2044_dist'] = round(vta_final['AGE2044']/vta_final['RP_HHPOP']*vta_final['abag_HHPOP_dist'])
    vta_final['abag_AGE4564_dist'] = round(vta_final['AGE4564']/vta_final['RP_HHPOP']*vta_final['abag_HHPOP_dist'])
    vta_final['abag_AGE65_dist'] = round(vta_final['AGE65']/vta_final['RP_HHPOP']*vta_final['abag_HHPOP_dist'])

    vta_final['abag_SFHH_dist'] = round(vta_final['SFHH']/vta_final['RP_TOTHH']*vta_final['abag_TOTHH_dist'])
    vta_final['abag_MFHH_dist'] = round(vta_final['MFHH']/vta_final['RP_TOTHH']*vta_final['abag_TOTHH_dist'])
    
    return vta_final

In [4]:
#Prep ABAG data

mtc_taz = gpd.read_file('MTC/')
# mtc_taz = mtc_taz.rename(columns={'TAZ1454':'ZONE'})
abag = pd.read_csv('2017ABAGLanduseAllocation/ABAGPROJ2017/DRAFT/run7224c_taz_summaries_2010.csv')
abag = abag.rename(columns={'ZONE':'TAZ1454'})
mtc_taz = pd.merge(mtc_taz,abag)

In [5]:
mtc_taz.head()

Unnamed: 0,AREALAND,AREAWATR,Cnty,DManParABF,DRETVTAMTC,DRetParAbg,FIPSSTCO,LANDACRE,Man_Abag10,Man_Par10,...,HHPOP,TOTPOP,EMPRES,AGE0004,AGE0519,AGE2044,AGE4564,AGE65P,total_job_spaces,total_residential_units
0,6799198,0,4,,,,6001,1680.118487,,,...,2948.0,2950.0,1771.0,116.0,242.0,783.0,1277.0,532.0,3344.0,1399.0
1,659615,0,4,,,,6001,162.994423,,,...,1872.0,1942.0,1202.0,89.0,173.0,812.0,653.0,215.0,6291.0,909.0
2,1074640,0,4,,,,6001,265.549338,,,...,4959.0,4998.0,3260.0,264.0,415.0,2444.0,1378.0,497.0,2091.0,2595.0
3,696057,0,4,,,,6001,171.999438,,,...,3606.0,3639.0,2241.0,170.0,327.0,1871.0,945.0,326.0,1434.0,1862.0
4,576343,0,4,,,,6001,142.417463,,,...,3276.0,3402.0,1937.0,163.0,410.0,1669.0,777.0,383.0,898.0,1662.0


In [6]:
# Prep the VTA data

vta_taz = gpd.read_file('VTA_TAZ/')
# vta_taz = vta_taz.rename(columns={'TAZ':'VTA_TAZ'})

dbf = Dbf5('2017ABAGLanduseAllocation/VTA/zmast13.dbf')
vta_dbf = dbf.to_dataframe()
vta_dbf = vta_dbf.rename(columns={'ZONE':'TAZ'})
vta_taz = pd.merge(vta_taz,vta_dbf)


# mtc_taz = gpd.read_file('/Users/vivek/Github/VTA/Landuse/MTC/MTCTAZ1454.dbf')


#http://analytics.mtc.ca.gov/foswiki/UrbanSimTwo/OutputToTravelModel

# vta_taz['centroid'] = vta_taz.centroid
# vta_taz = vta_taz.set_geometry('centroid')
# vta_taz['old_geometry'] = vta_taz['geometry']
# vta_taz['geometry'] = vta_taz['centroid']

# centroid = gpd.sjoin(mtc_taz[['TAZ1454','geometry']], vta_taz, how = "right", op='contains')
# centroid['TAZ1454'].nunique()
# centroid[['TAZ1454','TAZ']].to_csv("rel.csv")
# rel = centroid[['TAZ1454','TAZ']]
# missing_zones = pd.DataFrame([{'TAZ1454' : 1454, 'TAZ' : 2786},{'TAZ1454':404,'TAZ':980},{'TAZ1454' : 190, 'TAZ' : 1890}])
# rel = pd.concat([rel,missing_zones])
# rel.to_csv('rel_vta_mtc.csv',index=False)

rel = pd.read_csv('rel_vta_mtc.csv')
rel = rel.loc[~rel['TAZ1454'].isnull(),]

vta_taz = pd.merge(vta_taz[['TAZ','DIST','SDIST','CITY','COUNTY','geometry','TOTHH','TOTPOP',
                            'HHPOP','EMPRES','HH1','HH2','HH3','HH4','TACRES','RESACRE','CIACRE','TEMP',
                            'RETEMP','SEREMP','OTHEMP','AGEMP','MANEMP','WHOEMP','AGE0004','AGE0519',
                            'AGE2044','AGE4564','AGE65','SFHH','MFHH']],rel)

In [7]:
# Prep Data for CCAG

CCAG_taz = gpd.read_file('CCAGTAZ/')

# Not sure what the data in the TAZ shapefile relates to, will delete.
del CCAG_taz['ESENR']
del CCAG_taz['HSENR']
CCAG_taz.rename(columns={'CITY':'CITY_NAME'}, inplace=True)


dbf = Dbf5('2017ABAGLanduseAllocation/CCAGP2013/zmast13.dbf')
CCAG_dbf = dbf.to_dataframe()
# vta_dbf = vta_dbf.rename(columns={'ZONE':'VTA_TAZ'})


ccag_merged = pd.merge(CCAG_taz, CCAG_dbf, left_on = ['TAZ','DIST','COUNTY','SDIST'], right_on=['ZONE','DIST','COUNTY','SDIST'])
del ccag_merged['ZONE']

In [8]:
# Generate CCAG Calculations

rp_calc = prep_data(abag, ccag_merged)
vta_final = landuse_calcs(rp_calc)

geometry = vta_final['geometry']
crs = mtc_taz.crs
geo_df = gpd.GeoDataFrame(vta_final, crs=crs, geometry=geometry)

geo_df.head()
geo_df.to_file('abag_2010_ccag_dist')

del vta_final['geometry']
vta_final.to_csv('ccag_final_abag_2010.csv',index=False)
vta_final.to_excel('ccag_final_abag_2010.xlsx',index=False)

In [9]:
# Generate VTA Calculations

rp_calc = prep_data(abag, vta_taz)
vta_final = landuse_calcs(rp_calc)

geometry = vta_final['geometry']
crs = mtc_taz.crs
geo_df = gpd.GeoDataFrame(vta_final, crs=crs, geometry=geometry)


geo_df.head()
geo_df.to_file('abag_2010_vta_dist')

del vta_final['geometry']
vta_final.to_csv('vta_final_abag_2010.csv',index=False)
vta_final.to_excel('vta_final_abag_2010.xlsx',index=False)

In [11]:
vta_final

Unnamed: 0,TAZ1454,TOTHH,RESACRE,CIACRE,TOTEMP,TAZ,DIST,SDIST,CITY,COUNTY,...,abag_AGEMP_dist,abag_MANEMP_dist,abag_WHOEMP_dist,abag_AGE0004_dist,abag_AGE0519_dist,abag_AGE2044_dist,abag_AGE4564_dist,abag_AGE65_dist,abag_SFHH_dist,abag_MFHH_dist
0,1,43.0,1.000000,15.000000,21892.0,1701,1,1,17,1,...,48.0,361.0,501.0,1.0,0.0,33.0,33.0,10.0,1.0,42.0
1,2,131.0,1.000000,24.000000,33612.0,1702,1,1,17,1,...,72.0,553.0,769.0,13.0,23.0,72.0,82.0,41.0,3.0,128.0
2,3,254.0,1.000000,2.000000,2003.0,1703,1,1,17,1,...,4.0,33.0,46.0,11.0,15.0,183.0,133.0,105.0,6.0,248.0
3,4,131.0,1.000000,18.000000,17831.0,1704,1,1,17,1,...,38.0,294.0,409.0,7.0,0.0,48.0,107.0,71.0,3.0,128.0
4,5,560.0,1.000000,15.000000,13388.0,1705,1,1,17,1,...,29.0,221.0,307.0,17.0,64.0,468.0,340.0,96.0,12.0,548.0
5,6,2162.0,7.000000,8.000000,3417.0,1706,1,1,17,1,...,3.0,24.0,10.0,48.0,456.0,2350.0,609.0,311.0,13.0,2149.0
6,7,3651.0,13.000000,14.000000,11019.0,1707,1,1,17,1,...,2.0,349.0,244.0,121.0,458.0,2640.0,1737.0,843.0,7.0,3644.0
7,8,4463.0,8.330422,15.000000,3949.0,1708,1,1,17,1,...,2.0,52.0,41.0,308.0,698.0,2888.0,3709.0,1796.0,17.0,4446.0
8,9,4932.0,9.275070,47.000000,25650.0,1709,1,1,17,1,...,3.0,733.0,415.0,338.0,862.0,4195.0,2511.0,1104.0,31.0,4901.0
9,10,3207.0,9.342438,35.000000,9072.0,1710,1,1,17,1,...,1.0,482.0,241.0,260.0,573.0,2679.0,1717.0,441.0,77.0,3130.0


In [10]:
# vt = vta_taz.columns.values
# ab = abag_2010.columns.values 
# set(ab) - set(tz)
# # set(ab) - set(tz)
# # set(ab).difference(set(tz))
# set(vt).intersection(set(ab))
# set(ab) - set(vt)