ABAG Landuse Calculations for VTA and BART Model.

This notebook documents the joining data between VTA or CCAG TAZ Zones and MTC's 1454 regions.  VTA and CCAG will be reffered to as Regional Partners. 

Also there is details on how to convert ABAG numbers to BART numbers.

The inputs are the yearly calculations from MTC and VTA and CCAG.  The outputs are a csv, excel, and a shapefile.

In [None]:
#This uses geopy36
# import geopandas as gpd
import pandas as pd
# %matplotlib inline  
# from shapely.geometry import Point

from simpledbf import Dbf5
# !pip install simpledbf
#https://pypi.python.org/pypi/simpledbf/0.2.4

from data_prep import *
from employment_cleaning import *
from landuse_calcs import *

In [None]:
# Prepping the MTC data
mtc_taz = gpd.read_file('MTC/')
abag = pd.read_csv('ABAG_03202018/run7224c_taz_summaries_2015.csv')
abag = abag.rename(columns={'ZONE':'TAZ1454'})
mtc_taz = pd.merge(mtc_taz,abag)

mtc_taz.head()

In [None]:
#HHINCQ1 - number of houses in the 1st quartile.
mtc_taz.columns

In [None]:
mtc_taz.columns

In [None]:
# Prep the VTA data

vta_taz = gpd.read_file('VTA_TAZ/')
# vta_taz = vta_taz.rename(columns={'TAZ':'VTA_TAZ'})

dbf = Dbf5('2017ABAGLanduseAllocation/VTA/zmast13.dbf')
vta_dbf = dbf.to_dataframe()
vta_dbf = vta_dbf.rename(columns={'ZONE':'TAZ'})
vta_taz = pd.merge(vta_taz,vta_dbf)


# mtc_taz = gpd.read_file('/Users/vivek/Github/VTA/Landuse/MTC/MTCTAZ1454.dbf')


#http://analytics.mtc.ca.gov/foswiki/UrbanSimTwo/OutputToTravelModel

# vta_taz['centroid'] = vta_taz.centroid
# vta_taz = vta_taz.set_geometry('centroid')
# vta_taz['old_geometry'] = vta_taz['geometry']
# vta_taz['geometry'] = vta_taz['centroid']

# centroid = gpd.sjoin(mtc_taz[['TAZ1454','geometry']], vta_taz, how = "right", op='contains')
# centroid['TAZ1454'].nunique()
# centroid[['TAZ1454','TAZ']].to_csv("rel.csv")
# rel = centroid[['TAZ1454','TAZ']]
# missing_zones = pd.DataFrame([{'TAZ1454' : 1454, 'TAZ' : 2786},{'TAZ1454':404,'TAZ':980},{'TAZ1454' : 190, 'TAZ' : 1890}])
# rel = pd.concat([rel,missing_zones])
# rel.to_csv('rel_vta_mtc.csv',index=False)

rel = pd.read_csv('rel_vta_mtc.csv')
rel = rel.loc[~rel['TAZ1454'].isnull(),]

vta_taz = pd.merge(vta_taz[['TAZ','DIST','SDIST','COUNTY','geometry','TOTHH','TOTPOP',
                            'HHPOP','EMPRES','HH1','HH2','HH3','HH4','TACRES','RESACRE','CIACRE','TEMP',
                            'RETEMP','SEREMP','OTHEMP','AGEMP','MANEMP','WHOEMP','AGE0004','AGE0519',
                            'AGE2044','AGE4564','AGE65','SFHH','MFHH']],rel)

In [None]:
# Prep Data for CCAG

# CCAG_taz = gpd.read_file('CCAGTAZ/')

# Not sure what the data in the TAZ shapefile relates to, will delete.
# del CCAG_taz['ESENR']
# del CCAG_taz['HSENR']
# CCAG_taz.rename(columns={'CITY':'CITY_NAME'}, inplace=True)
years = ['2015','2017','2020','2023','2025','2030','2035','2040','2043']

dbf = Dbf5('CCAG/Zmast15.dbf')
ccag_dbf = dbf.to_dataframe()
ccag_extra_counties = ccag_dbf.query("COUNTY==10|COUNTY==11|COUNTY==12|COUNTY==13")
ccag_dbf.rename(columns={'ZONE':'TAZ'}, inplace=True)

ccag_correspondence = pd.read_excel('CCAG/CCAGTAZ-to-MTC1454-for-Vivek.xlsx', sheet_name='CCAGTAZ-MTC1454-Corresp')
ccag_correspondence.rename(columns={'CCAGTAZ':'TAZ','MTCTAZ1454':'TAZ1454'}, inplace=True)
ccag_df = pd.merge(ccag_dbf, ccag_correspondence[['TAZ','TAZ1454']], left_on = ['TAZ'], right_on = ['TAZ'])

sc_city_corr = pd.read_excel('SSC_TAZ_to_City_Correspondence.xlsx')
sc_city_corr.rename(columns={'BART_ZONE':'TAZ'}, inplace=True)
ccag_df = pd.merge(ccag_df,sc_city_corr, how = 'left')


for year in years:

    abag = pd.read_csv('ABAG_03202018/run7224c_taz_summaries_' + year + '.csv')
    abag = abag.rename(columns={'ZONE':'TAZ1454'})
    #     mtc_taz = pd.merge(mtc_taz,abag)

    rp_calc = prep_data(abag, ccag_df, geom_given = False)

    pairs = [('RP_TOTHH','TOTHH'),('RP_HHPOP','HHPOP'),('RP_TOTPOP','TOTPOP'),
             ('RP_EMPRES','EMPRES'),
             ('RP_RESACRE','RESACRE'),('RP_CIACRE','CIACRE'),('RP_TEMP','TOTEMP')]
    for pair in pairs:
        rp_calc.loc[rp_calc.query("COUNTY!=3&COUNTY!=2").index,pair[0]] = rp_calc.loc[rp_calc.query("COUNTY!=3&COUNTY!=2").index,pair[1]]

    pairs = [('HH1','HHINCQ1'),('HH2','HHINCQ2'),('HH3','HHINCQ3'),('HH4','HHINCQ4'),
             ('RP_AGE0004','AGE0004'),('RP_AGE0519','AGE0519'),('RP_AGE2044','AGE2044'),('RP_AGE4564','AGE4564'),('RP_AGE65','AGE65P')
            ]
    for pair in pairs:
        rp_calc.loc[rp_calc.index,pair[0]] = rp_calc.loc[rp_calc.index,pair[1]]
        
    rp_calc = employment_cleaning(rp_calc)        
        
    ccag_calcs = landuse_calcs(rp_calc)
    ccag_calcs = ccag_calcs.sort_values(by='TAZ')

    ccag_calcs.to_csv('output/ccag_proj_abag_calcs_' + year + '.csv',index=False)

    ccag_calcs_final = ccag_calcs[['TAZ', 'DIST', 'SDIST', 'COUNTY', 'abag_TOTHH_dist', 'abag_HHPOP_dist',
           'abag_TOTPOP_dist', 'abag_EMPRES_dist', 'abag_SFHH_dist', 'abag_MFHH_dist', 'abag_HH1_dist', 'abag_HH2_dist', 'abag_HH3_dist', 'abag_HH4_dist',
           'INC1', 'INC2', 'INC3', 'INC4', 'MHHINC', 'TACRES', 'abag_RESACRE_dist',
           'abag_CIACRE_dist', 'Z2SHARE', 'abag_TEMP_dist', 'abag_RETEMP_dist', 'abag_SEREMP_dist', 'abag_OTHEMP_dist', 'abag_AGEMP_dist',
           'abag_MANEMP_dist', 'abag_WHOEMP_dist', 'abag_AGE0004_dist', 'abag_AGE0519_dist', 'abag_AGE2044_dist', 'abag_AGE4564_dist',
           'abag_AGE65_dist', 'abag_AGE0513_dist', 'abag_AGE1417_dist', 'abag_AGE1824_dist', 'ESENR', 'HSENR',
           'COLLENR', 'COLLENRF', 'COLLENRP','TAZ1454']].rename(columns={'TAZ':'ZONE'})

    ccag_calcs_final.rename(columns=lambda x: x.replace('abag_','').replace('_dist',''),inplace=True)

    del ccag_calcs_final['TAZ1454']
    col_order = ccag_calcs_final.columns
    ccag_calcs_final_appended = ccag_calcs_final.append(ccag_extra_counties)[col_order]

    ccag_calcs_final_appended.to_csv('output/ccag_calcs_clean' + year + '.csv',na_rep=0,index=False)

In [None]:
#Prep ABAG data
#BART: Update abag projection to new file.

# mtc_taz = gpd.read_file('MTC/')
# mtc_taz = mtc_taz.rename(columns={'TAZ1454':'ZONE'})
years = ['2015','2018','2020','2025','2026','2030','2035','2038','2040']

#Prep BART data

# bart_dbf = pd.read_excel('bart_calcs_clean2015_AvdH_corrected.xlsx', sheet_name='Corrected')
# year = '2015'

# This might be inefficient since the relationship seems to be inside the bart_TAZ shapefile.
# bart_taz = gpd.read_file('BART-TAZ/TAZ/')
# bart_crs = bart_taz.crs
# bart_taz = pd.merge(bart_taz[['TAZ','KEY','AREA', 'geometry']],bart_dbf)

for year in years:

    if(year in ['2015','2018','2020','2025','2026','2030']):
        print(year)
        bart_dbf = pd.read_csv('bart_calcs_clean2015_AvdH_input.csv')
        bart_extra_counties = bart_dbf.query("COUNTY==10|COUNTY==11|COUNTY==12|COUNTY==13")
        bart_extra_counties.rename(columns={'TAZ':'ZONE'},inplace=True)

        bart_dbf = bart_dbf.rename(columns={'ZONE':'TAZ'})
        bart_dbf['EMPRES'] = bart_dbf['EMPRES'].replace(to_replace={'#REF!':'1'}).astype(int)

    elif(year in ['2035','2038','2040']):
        print(year)
        bart_dbf = pd.read_excel('BART_2025_2035_clean_landuse_data.xlsx', sheet_name='ZMAST35')
        bart_extra_counties = bart_dbf.query("COUNTY==10|COUNTY==11|COUNTY==12|COUNTY==13")
        bart_extra_counties.rename(columns={'TAZ':'ZONE'},inplace=True)

        bart_dbf = bart_dbf.rename(columns={'ZONE':'TAZ'})
        bart_rel = pd.read_csv('BART_zmast15_EIR_Zone_Corresp.csv')
        bart_rel = bart_rel.rename(columns={'BARTZONE':'TAZ', 'MTCTAZ':'TAZ1454'})
        bart_dbf = pd.merge(bart_dbf, bart_rel)

    sc_city_corr = pd.read_excel('SSC_TAZ_to_City_Correspondence.xlsx')
    sc_city_corr.rename(columns={'BART_ZONE':'TAZ'}, inplace=True)
    bart_dbf = pd.merge(bart_dbf,sc_city_corr, how = 'left')
    
    abag = pd.read_csv('ABAG_03202018/run7224c_taz_summaries_' + year + '.csv')
    abag = abag.rename(columns={'ZONE':'TAZ1454'})
    #     mtc_taz = pd.merge(mtc_taz,abag)

    rp_calc = prep_data(abag, bart_dbf, geom_given = False)

    # Copy over values for the area that is not in the SC county or chosen region to cover up errors in the input files.
    pairs = [('RP_TOTHH','TOTHH'),('RP_HHPOP','HHPOP'),('RP_TOTPOP','TOTPOP'),
             ('RP_EMPRES','EMPRES'),
             ('RP_RESACRE','RESACRE'),('RP_CIACRE','CIACRE'),('RP_TEMP','TOTEMP')]
    for pair in pairs:
        rp_calc.loc[rp_calc.query("COUNTY!=3").index,pair[0]] = rp_calc.loc[rp_calc.query("COUNTY!=3").index,pair[1]]

    pairs = [('HH1','HHINCQ1'),('HH2','HHINCQ2'),('HH3','HHINCQ3'),('HH4','HHINCQ4'),
             ('RP_AGE0004','AGE0004'),('RP_AGE0519','AGE0519'),('RP_AGE2044','AGE2044'),('RP_AGE4564','AGE4564'),('RP_AGE65','AGE65P')
            ]
    for pair in pairs:
        rp_calc.loc[rp_calc.index,pair[0]] = rp_calc.loc[rp_calc.index,pair[1]]

    rp_calc = employment_cleaning(rp_calc)        
       
    bart_calcs = landuse_calcs(rp_calc)
    bart_calcs = bart_calcs.sort_values(by='TAZ')
    # del bart_calcs['geometry']
    bart_calcs.to_csv('output/bart_proj_abag_calcs_' + year + '.csv',index=False)
#     bart_calcs.to_excel('output/bart_proj_abag_calcs_'  + year + '.xlsx' ,index=False)

    bart_calcs_final = bart_calcs[['TAZ', 'DIST', 'SDIST', 'COUNTY', 'abag_TOTHH_dist', 'abag_HHPOP_dist',
           'abag_TOTPOP_dist', 'abag_EMPRES_dist', 'abag_SFHH_dist', 'abag_MFHH_dist', 'abag_HH1_dist', 'abag_HH2_dist', 'abag_HH3_dist', 'abag_HH4_dist',
           'INC1', 'INC2', 'INC3', 'INC4', 'MHHINC', 'TACRES', 'abag_RESACRE_dist',
           'abag_CIACRE_dist', 'Z2SHARE', 'abag_TEMP_dist', 'abag_RETEMP_dist', 'abag_SEREMP_dist', 'abag_OTHEMP_dist', 'abag_AGEMP_dist',
           'abag_MANEMP_dist', 'abag_WHOEMP_dist', 'abag_AGE0004_dist', 'abag_AGE0519_dist', 'abag_AGE2044_dist', 'abag_AGE4564_dist',
           'abag_AGE65_dist', 'abag_AGE0513_dist', 'abag_AGE1417_dist', 'abag_AGE1824_dist', 'ESENR', 'HSENR',
           'COLLENR', 'COLLENRF', 'COLLENRP','TAZ1454']].rename(columns={'TAZ':'ZONE'})

    # https://www.dataquest.io/blog/pandas-cheat-sheet/
    bart_calcs_final.rename(columns=lambda x: x.replace('abag_','').replace('_dist',''),inplace=True)

    del bart_calcs_final['TAZ1454']
    col_order = bart_calcs_final.columns
    bart_calcs_appended = bart_calcs_final.append(bart_extra_counties)[col_order]
    
    bart_calcs_appended.to_csv('output/bart_calcs_clean' + year + '.csv',na_rep=0,index=False)
#     bart_calcs_final.to_excel('output/bart_calcs_clean'  + year + '.xlsx' ,index=False)

In [None]:
# Generate CCAG Calculations

rp_calc = prep_data(abag, ccag_merged, rel)
vta_final = landuse_calcs(rp_calc)

geometry = vta_final['geometry']
crs = mtc_taz.crs
geo_df = gpd.GeoDataFrame(vta_final, crs=crs, geometry=geometry)

geo_df.head()
geo_df.to_file('abag_2010_ccag_dist')

del vta_final['geometry']
vta_final.to_csv('ccag_final_abag_2010.csv',index=False)
vta_final.to_excel('ccag_final_abag_2010.xlsx',index=False)

In [None]:
# Generate VTA Calculations

rp_calc = prep_data(abag, vta_taz)
vta_final = landuse_calcs(rp_calc)

geometry = vta_final['geometry']
crs = mtc_taz.crs
geo_df = gpd.GeoDataFrame(vta_final, crs=crs, geometry=geometry)


geo_df.head()
geo_df.to_file('abag_2010_vta_dist')

del vta_final['geometry']
vta_final.to_csv('vta_final_abag_2010.csv',index=False)
vta_final.to_excel('vta_final_abag_2010.xlsx',index=False)