In [1]:
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import os
from tqdm import tqdm

%matplotlib inline

os.chdir('/Users/chasedawson/dev/uva_equity_center/summer-sandbox/nfhl')
os.getcwd()

'/Users/chasedawson/dev/uva_equity_center/summer-sandbox/nfhl'

In [2]:
# read in eastern shore clipped coast data
eastshore_sp = {}
spatial_units = ['counties', 'tracts', 'blkgps', 'blocks']
path_to_data = '/../spatial_units/data/clipped_coast/'
for su in spatial_units:
    eastshore_sp[su] = gpd.read_file(os.getcwd() + path_to_data + 'eastshore_{su}_clipped_coast.shp'.format(su = su))

In [3]:
accomack_data_path = "./data/accomack/51001C_20150617"
northampton_data_path = "./data/northampton/51131C_20150302"

eastshore_data_paths = [
    accomack_data_path,
    northampton_data_path
]

def read_paths(paths):
    return pd.concat([gpd.read_file(path) for path in paths])

paths = [os.path.join(data_path, "S_Fld_Haz_Ar.shp") for data_path in eastshore_data_paths]
eastshore_fhl = read_paths(paths)
eastshore_fhl.head()

Unnamed: 0,DFIRM_ID,VERSION_ID,FLD_AR_ID,STUDY_TYP,FLD_ZONE,ZONE_SUBTY,SFHA_TF,STATIC_BFE,V_DATUM,DEPTH,LEN_UNIT,VELOCITY,VEL_UNIT,AR_REVERT,AR_SUBTRV,BFE_REVERT,DEP_REVERT,DUAL_ZONE,SOURCE_CIT,geometry
0,51001C,1.1.1.0,51001C_51001C_491,NP,X,AREA OF MINIMAL FLOOD HAZARD,F,-9999.0,,-9999.0,,-9999.0,,,,-9999.0,-9999.0,,51001C_51001C_STUDY2,"POLYGON ((-75.35198 37.92819, -75.35207 37.928..."
1,51001C,1.1.1.0,51001C_51001C_492,NP,AE,,T,10.0,NAVD88,-9999.0,Feet,-9999.0,,,,-9999.0,-9999.0,,51001C_51001C_STUDY2,"POLYGON ((-75.64995 37.92998, -75.64994 37.930..."
2,51001C,1.1.1.0,51001C_51001C_488,NP,X,AREA OF MINIMAL FLOOD HAZARD,F,-9999.0,,-9999.0,,-9999.0,,,,-9999.0,-9999.0,,51001C_51001C_STUDY2,"POLYGON ((-75.34462 37.92899, -75.34462 37.929..."
3,51001C,1.1.1.0,51001C_51001C_489,NP,X,0.2 PCT ANNUAL CHANCE FLOOD HAZARD,F,-9999.0,,-9999.0,,-9999.0,,,,-9999.0,-9999.0,,51001C_51001C_STUDY2,"POLYGON ((-75.38406 37.93054, -75.38406 37.930..."
4,51001C,1.1.1.0,51001C_51001C_490,NP,AE,,T,5.0,NAVD88,-9999.0,Feet,-9999.0,,,,-9999.0,-9999.0,,51001C_51001C_STUDY2,"POLYGON ((-75.37010 37.92721, -75.37037 37.927..."


In [6]:
eastshore_sp['blocks']['BLKGRPCE'] = eastshore_sp['blocks'].GEOID.apply(lambda x: str(x)[11])

In [7]:
def get_area(a):
    # get area of shp in km^2
    return round(a.geometry.to_crs("EPSG:3395").map(lambda p: p.area / 10**6).iloc[0], 6)

def get_intersection_area(a, b):
    """
    Computes the intersection of a and b and returns the intersection area and size of the intersected area
    as a percentage of the total area of a.
    
    Parameters
    ----------
    a : geopandas GeoDataFrame, required
    b : geopandas GeoDataFrame, required
    """
    intersection = gpd.overlay(a, b, how='intersection')
    if len(intersection) == 0:
        return {'area': 0, 'percent': 0}
    
    # get area of intersection in km^2
    intersect_area = get_area(intersection)
    
    # get area of original shp in km^2
    shp_area = get_area(a)
    
    # compute percentage of intersection of shp
    percentage = (intersect_area / shp_area) * 100
    
    return {'area': intersect_area, 'percent': percentage}

In [16]:
def get_perc_fld_zone_in_region(fhl, sp, logging = False):
    """
    Given National Flood Hazard Layer data (fhl) and spatial boundaries (sp) for counties, tracts, block groups, and blocks,
    this method computes the intersected area between the flood zone and specific geographic area to get an estimate of how much
    of that region is in what flood zone. 
    
    Parameters
    ----------
    fhl : GeoPandas GeoDataFrame, required
        National Flood Hazard Layer data.
        
    sp : dict, required
        Python dictionary containing spatial boundaries for counties, tracts, block groups, and blocks.
        Each spatial unit is a key for a GeoPandas GeoDataFrame with the spatial data.
        
    logging : bool, optional (default is False)
        Whether or not print statements should be executed for more information during runtime.
        
    Output
    ------
    GeoPandas GeoDataFrame
        Each row represents one flood zone type for a geographic region containing the information for how much of 
        that specific region was intersected by that flood zone type.
        
    """
    fld_zones = fhl.ZONE_SUBTY.value_counts().index.tolist()
    area_df = pd.DataFrame()
    for zone in fld_zones:
        if logging:
            print('Starting Zone: {zone}'.format(zone=zone))
        zone_df = fhl[fhl.ZONE_SUBTY == zone].dissolve()
        for su in ['counties', 'tracts', 'blkgps', 'blocks']:
            if logging:
                print('Starting Spatial Unit: {su}'.format(su = su))
                
            area_data = {
                'GEOID': [],
                'zone': [],
                'spatial_unit': [],
                'area': [],
                'perc': []
            }
            
            region = sp[su]
            
            if su == 'tracts':
                # get counties that were intersected by this flood zone
                intersected_areas = area_df[(area_df.zone == zone) & (area_df.spatial_unit == "counties") & (area_df.area > 0)]
                intersected_counties = sp['counties'][sp['counties'].GEOID.isin(intersected_areas.GEOID.values)]
                # filter region to only tracts within counties that have been intersected, reduces exploration
                region = region[region.COUNTYFP.isin(intersected_counties.COUNTYFP.values)]
            
            elif su == 'blkgps':
                # get tracts that were intersected by this flood zone
                intersected_areas = area_df[(area_df.zone == zone) & (area_df.spatial_unit == "tracts") & (area_df.area > 0)]
                intersected_tracts = sp['tracts'][sp['tracts'].GEOID.isin(intersected_areas.GEOID.values)]
                # filter region to only blkgps within tracts that have been intersected, reduces exploration
                region = region[region.TRACTCE.isin(intersected_tracts.TRACTCE.values)]
                
            elif su == 'blocks':
                # get tracts that were intersected by this flood zone
                intersected_areas = area_df[(area_df.zone == zone) & (area_df.spatial_unit == "blkgps") & (area_df.area > 0)]
                intersected_blkgps = sp['blkgps'][sp['blkgps'].GEOID.isin(intersected_areas.GEOID.values)]
                # filter region to only blocks that have been intersected, reduces exploration
                region = region[region.BLKGRPCE.isin(intersected_blkgps.BLKGRPCE.values)]
                
            for i in tqdm(range(len(region))):
                shp = region.iloc[[i]]
                stats = get_intersection_area(shp, zone_df)
                
                # append data to area data
                area_data['GEOID'].append(shp.GEOID.iloc[0])
                area_data['zone'].append(zone)
                area_data['spatial_unit'].append(su)
                area_data['area'].append(stats['area'])
                area_data['perc'].append(stats['percent'])
                
            area_df = pd.concat([area_df, pd.DataFrame(area_data)])
            
    return area_df

In [17]:
eastshore_area_df = get_perc_fld_zone_in_region(eastshore_fhl, eastshore_sp, logging = True)


Starting Zone: 0.2 PCT ANNUAL CHANCE FLOOD HAZARD


  0%|          | 0/2 [00:00<?, ?it/s]

Starting Spatial Unit: counties


100%|██████████| 2/2 [00:10<00:00,  5.08s/it]
  0%|          | 0/16 [00:00<?, ?it/s]

Starting Spatial Unit: tracts


100%|██████████| 16/16 [00:17<00:00,  1.08s/it]
  0%|          | 0/43 [00:00<?, ?it/s]

Starting Spatial Unit: blkgps


100%|██████████| 43/43 [00:32<00:00,  1.31it/s]
  0%|          | 0/5526 [00:00<?, ?it/s]

Starting Spatial Unit: blocks


100%|██████████| 5526/5526 [56:54<00:00,  1.62it/s]  


Starting Zone: AREA OF MINIMAL FLOOD HAZARD


  0%|          | 0/2 [00:00<?, ?it/s]

Starting Spatial Unit: counties


100%|██████████| 2/2 [00:08<00:00,  4.01s/it]
  0%|          | 0/16 [00:00<?, ?it/s]

Starting Spatial Unit: tracts


100%|██████████| 16/16 [00:13<00:00,  1.20it/s]
  0%|          | 0/42 [00:00<?, ?it/s]

Starting Spatial Unit: blkgps


100%|██████████| 42/42 [00:30<00:00,  1.39it/s]
  0%|          | 0/5526 [00:00<?, ?it/s]

Starting Spatial Unit: blocks


100%|██████████| 5526/5526 [1:04:50<00:00,  1.42it/s]


In [19]:
for key in ['counties', 'tracts', 'blkgps', 'blocks']:
    to_save = eastshore_area_df[eastshore_area_df.spatial_unit == key]
    to_save.to_csv('nfhl_eastshore_{sp}.csv'.format(sp = key))