In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import geopandas as gpd
from tqdm import tqdm

%matplotlib inline

In [3]:
os.chdir('/Users/micheleclaibourn/Box Sync/mpc/dataForDemocracy/summer-sandbox/cville_region_collection')
%pwd

'/Users/micheleclaibourn/Box Sync/mpc/dataForDemocracy/summer-sandbox/cville_region_collection'

In [4]:
# read in cville spatial data
cville_sp = {}
spatial_units = ['counties', 'tracts', 'blkgps', 'blocks']
path_to_data = '/data/shape/'
for su in spatial_units:
    cville_sp[su] = gpd.read_file(os.getcwd() + path_to_data + 'cville_{su}.shp'.format(su = su))

# create GEOID in blocks (is GEOID10)    
cville_sp['blocks']['BLKGRPCE'] = cville_sp['blocks'].GEOID10.apply(lambda x: str(x)[11])
cville_sp['blocks']['GEOID'] = cville_sp['blocks'].GEOID10

In [5]:
albemarle_data_path = "./dataraw/nfhl/charlottesville/51003C_20211104"
fluvanna_data_path = "./dataraw/nfhl/fluvanna/51065C_20160325"
greene_data_path = "./dataraw/nfhl/greene/51079C_20210323"
louisa_data_path = "./dataraw/nfhl/louisa/51109C_20200723"
nelson_data_path = "./dataraw/nfhl/nelson/51125C_20100618"

cville_data_paths = [
    albemarle_data_path,
    fluvanna_data_path,
    greene_data_path,
    louisa_data_path,
    nelson_data_path
]

In [6]:
def read_paths(paths):
    return pd.concat([gpd.read_file(path) for path in paths])

paths = [os.path.join(data_path, "S_FLD_HAZ_AR.shp") for data_path in cville_data_paths]
cville_fhl = read_paths(paths)
cville_fhl.head()


Unnamed: 0,DFIRM_ID,VERSION_ID,FLD_AR_ID,STUDY_TYP,FLD_ZONE,ZONE_SUBTY,SFHA_TF,STATIC_BFE,V_DATUM,DEPTH,LEN_UNIT,VELOCITY,VEL_UNIT,AR_REVERT,AR_SUBTRV,BFE_REVERT,DEP_REVERT,DUAL_ZONE,SOURCE_CIT,geometry
0,51003C,1.1.1.0,51003C_1,NP,X,0.2 PCT ANNUAL CHANCE FLOOD HAZARD,F,-9999.0,,-9999.0,,-9999.0,,,,-9999.0,-9999.0,,51003C_FIS1,"POLYGON ((-78.54919 38.12872, -78.54919 38.128..."
1,51003C,1.1.1.0,51003C_2,NP,A,,T,-9999.0,,-9999.0,,-9999.0,,,,-9999.0,-9999.0,,51003C_FIS1,"POLYGON ((-78.70209 37.81493, -78.70225 37.815..."
2,51003C,1.1.1.0,51003C_3,NP,A,,T,-9999.0,,-9999.0,,-9999.0,,,,-9999.0,-9999.0,,51003C_FIS1,"POLYGON ((-78.67918 38.02400, -78.67903 38.024..."
3,51003C,1.1.1.0,51003C_4,NP,AE,,T,-9999.0,,-9999.0,,-9999.0,,,,-9999.0,-9999.0,,51003C_FIS1,"POLYGON ((-78.56976 38.06583, -78.56964 38.065..."
4,51003C,1.1.1.0,51003C_5,NP,A,,T,-9999.0,,-9999.0,,-9999.0,,,,-9999.0,-9999.0,,51003C_FIS1,"POLYGON ((-78.52941 37.99722, -78.52978 37.997..."


In [7]:
# Create new floodzone: FLD_ZONE = A or AE is 1per, ZONE_SUBTY = 0.2 PCT ANNUAL CHANCE FLOOD HAZARD or AREA WITH REDUCED FLOOD RISK DUE TO LEVEE is 2per
flood_def = [
    (cville_fhl['FLD_ZONE'] == 'A') | (cville_fhl['FLD_ZONE'] == 'AE'),
    (cville_fhl['ZONE_SUBTY'] == '0.2 PCT ANNUAL CHANCE FLOOD HAZARD') | (cville_fhl['ZONE_SUBTY'] == 'AREA WITH REDUCED FLOOD RISK DUE TO LEVEE')
]
values = ['1per', '2per']
cville_fhl['floodzones'] = np.select(flood_def, values)
cville_fhl.head()

Unnamed: 0,DFIRM_ID,VERSION_ID,FLD_AR_ID,STUDY_TYP,FLD_ZONE,ZONE_SUBTY,SFHA_TF,STATIC_BFE,V_DATUM,DEPTH,...,VELOCITY,VEL_UNIT,AR_REVERT,AR_SUBTRV,BFE_REVERT,DEP_REVERT,DUAL_ZONE,SOURCE_CIT,geometry,floodzones
0,51003C,1.1.1.0,51003C_1,NP,X,0.2 PCT ANNUAL CHANCE FLOOD HAZARD,F,-9999.0,,-9999.0,...,-9999.0,,,,-9999.0,-9999.0,,51003C_FIS1,"POLYGON ((-78.54919 38.12872, -78.54919 38.128...",2per
1,51003C,1.1.1.0,51003C_2,NP,A,,T,-9999.0,,-9999.0,...,-9999.0,,,,-9999.0,-9999.0,,51003C_FIS1,"POLYGON ((-78.70209 37.81493, -78.70225 37.815...",1per
2,51003C,1.1.1.0,51003C_3,NP,A,,T,-9999.0,,-9999.0,...,-9999.0,,,,-9999.0,-9999.0,,51003C_FIS1,"POLYGON ((-78.67918 38.02400, -78.67903 38.024...",1per
3,51003C,1.1.1.0,51003C_4,NP,AE,,T,-9999.0,,-9999.0,...,-9999.0,,,,-9999.0,-9999.0,,51003C_FIS1,"POLYGON ((-78.56976 38.06583, -78.56964 38.065...",1per
4,51003C,1.1.1.0,51003C_5,NP,A,,T,-9999.0,,-9999.0,...,-9999.0,,,,-9999.0,-9999.0,,51003C_FIS1,"POLYGON ((-78.52941 37.99722, -78.52978 37.997...",1per


In [8]:
def get_area(a):
    # get area of shp in km^2
    return round(a.geometry.to_crs("EPSG:3395").map(lambda p: p.area / 10**6).iloc[0], 6)

def get_intersection_area(a, b):
    """
    Computes the intersection of a and b and returns the intersection area and size of the intersected area
    as a percentage of the total area of a.
    
    Parameters
    ----------
    a : geopandas GeoDataFrame, required
    b : geopandas GeoDataFrame, required
    """
    intersection = gpd.overlay(a, b, how='intersection')
    if len(intersection) == 0:
        return {'area': 0, 'percent': 0}
    
    # get area of intersection in km^2
    intersect_area = get_area(intersection)
    
    # get area of original shp in km^2
    shp_area = get_area(a)
    
    # compute percentage of intersection of shp
    percentage = (intersect_area / shp_area) * 100
    
    return {'area': intersect_area, 'percent': percentage}


In [9]:
def get_perc_fld_zone_in_region(fhl, sp, logging = False):
    """
    Given National Flood Hazard Layer data (fhl) and spatial boundaries (sp) for counties, tracts, block groups, and blocks,
    this method computes the intersected area between the flood zone and specific geographic area to get an estimate of how much
    of that region is in what flood zone. 
    
    Parameters
    ----------
    fhl : GeoPandas GeoDataFrame, required
        National Flood Hazard Layer data.
        
    sp : dict, required
        Python dictionary containing spatial boundaries for counties, tracts, block groups, and blocks.
        Each spatial unit is a key for a GeoPandas GeoDataFrame with the spatial data.
        
    logging : bool, optional (default is False)
        Whether or not print statements should be executed for more information during runtime.
        
    Output
    ------
    GeoPandas GeoDataFrame
        Each row represents one flood zone type for a geographic region containing the information for how much of 
        that specific region was intersected by that flood zone type.
        
    """
    fld_zones = fhl.floodzones.value_counts().index.tolist()
    area_df = pd.DataFrame()
    for zone in fld_zones:
        if logging:
            print('Starting Zone: {zone}'.format(zone=zone))
        zone_df = fhl[fhl.floodzones == zone].dissolve()
        for su in ['counties', 'tracts', 'blkgps', 'blocks']:
            if logging:
                print('Starting Spatial Unit: {su}'.format(su = su))
                
            area_data = {
                'GEOID': [],
                'zone': [],
                'spatial_unit': [],
                'area': [],
                'perc': []
            }
            
            region = sp[su]
            
            if su == 'tracts':
                # get counties that were intersected by this flood zone
                intersected_areas = area_df[(area_df.zone == zone) & (area_df.spatial_unit == "counties") & (area_df.area > 0)]
                intersected_counties = sp['counties'][sp['counties'].GEOID.isin(intersected_areas.GEOID.values)]
                # filter region to only tracts within counties that have been intersected, reduces exploration
                region = region[region.COUNTYFP.isin(intersected_counties.COUNTYFP.values)]
            
            elif su == 'blkgps':
                # get tracts that were intersected by this flood zone
                intersected_areas = area_df[(area_df.zone == zone) & (area_df.spatial_unit == "tracts") & (area_df.area > 0)]
                intersected_tracts = sp['tracts'][sp['tracts'].GEOID.isin(intersected_areas.GEOID.values)]
                # filter region to only blkgps within tracts that have been intersected, reduces exploration
                region = region[region.TRACTCE.isin(intersected_tracts.TRACTCE.values)]
                
            elif su == 'blocks':
                # get tracts that were intersected by this flood zone
                intersected_areas = area_df[(area_df.zone == zone) & (area_df.spatial_unit == "blkgps") & (area_df.area > 0)]
                intersected_blkgps = sp['blkgps'][sp['blkgps'].GEOID.isin(intersected_areas.GEOID.values)]
                # filter region to only blocks that have been intersected, reduces exploration
                region = region[region.BLKGRPCE.isin(intersected_blkgps.BLKGRPCE.values)]
                
            for i in tqdm(range(len(region))):
                shp = region.iloc[[i]]
                stats = get_intersection_area(shp, zone_df)
                
                # append data to area data
                area_data['GEOID'].append(shp.GEOID.iloc[0])
                area_data['zone'].append(zone)
                area_data['spatial_unit'].append(su)
                area_data['area'].append(stats['area'])
                area_data['perc'].append(stats['percent'])
                
            area_df = pd.concat([area_df, pd.DataFrame(area_data)])
            
    return area_df

In [10]:
cville_area_df = get_perc_fld_zone_in_region(cville_fhl, cville_sp, logging = True)

Starting Zone: 0
Starting Spatial Unit: counties


100%|██████████| 6/6 [00:14<00:00,  2.34s/it]


Starting Spatial Unit: tracts


100%|██████████| 50/50 [00:49<00:00,  1.01it/s]


Starting Spatial Unit: blkgps


100%|██████████| 155/155 [02:33<00:00,  1.01it/s]


Starting Spatial Unit: blocks


100%|██████████| 10734/10734 [2:29:30<00:00,  1.20it/s] 


Starting Zone: 1per
Starting Spatial Unit: counties


100%|██████████| 6/6 [00:12<00:00,  2.15s/it]


Starting Spatial Unit: tracts


100%|██████████| 50/50 [00:52<00:00,  1.06s/it]


Starting Spatial Unit: blkgps


100%|██████████| 135/135 [02:16<00:00,  1.01s/it]


Starting Spatial Unit: blocks


100%|██████████| 10734/10734 [2:55:55<00:00,  1.02it/s] 


Starting Zone: 2per
Starting Spatial Unit: counties


100%|██████████| 6/6 [00:00<00:00,  6.40it/s]


Starting Spatial Unit: tracts


100%|██████████| 41/41 [00:04<00:00,  8.90it/s]


Starting Spatial Unit: blkgps


100%|██████████| 115/115 [00:11<00:00, 10.09it/s]


Starting Spatial Unit: blocks


100%|██████████| 10734/10734 [06:57<00:00, 25.71it/s]


In [11]:
pwd

'/Users/micheleclaibourn/Box Sync/mpc/dataForDemocracy/summer-sandbox/cville_region_collection'

In [12]:
for key in ['counties', 'tracts', 'blkgps', 'blocks']:
    cville_area_df[cville_area_df.spatial_unit == key].to_csv('dataraw/nfhl_cville_{key}.csv'.format(key = key))