In [1]:
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import os
from tqdm import tqdm

%matplotlib inline

os.chdir('/Users/chasedawson/dev/uva_equity_center/summer-sandbox/nfhl')
os.getcwd()

'/Users/chasedawson/dev/uva_equity_center/summer-sandbox/nfhl'

In [8]:
def read_spatial_data(basename, clip_coast=False, path_to_spatial_data="/../spatial_units/data", path_to_water="/data/water/ne_10m_ocean.shp"):
    """
    Reads in spatial data that is in the format 'basename_spatialUnit.shp'.
    
    Parameters
    ----------
    basename : str, required
        Base name of shape files. For example, if the files you want to read are of the format
        'cville_counties.shp', 'cville_blocks.shp', etc. then the basename is 'cville'.
        
    clip_coast : bool, optional (default is False)
        If True, shp data will be compared to coast line data and oceans will be clipped out.
    
    Output
    ------
    dictionary containing geopandas dataframes for each spatial unit
    
    """
    print('Reading spatial data for {basename}...'.format(basename = basename))
    # read in coast line data 
    if clip_coast:
        ocean = gpd.read_file(os.getcwd() + path_to_water)
    
    # store current working directory
    og_wd = os.getcwd()
    
    
    # change working directory to where spatial data is located
    os.chdir(og_wd + path_to_spatial_data)
    
    # create empty dictionary
    data = {}
    
    if clip_coast:
        spatial_units = ['blocks', 'blkgps', 'tracts']

        # read in counties and clip first
        print("Reading counties...")
        counties = gpd.read_file(basename + '_counties.shp')
        ocean = ocean.to_crs(counties.crs)
        counties = gpd.overlay(counties, ocean, how='difference')
        data['counties'] = counties
        print("Done.")
    
        # read in rest of shapefiles, clip respective to counties, store as keys in dict
        for spatial_unit in spatial_units:
            print("Reading {spatial_unit}...".format(spatial_unit = spatial_unit))
            # read in shp file
            shp = gpd.read_file(basename + '_{spatial_unit}.shp'.format(spatial_unit = spatial_unit))

            # convert coast line data to crs of shp 
            counties = counties.to_crs(shp.crs)

            # clip out ocean
            shp = gpd.overlay(shp, counties[['geometry']], how='intersection', keep_geom_type=True)

            # add dict with spatial unit as key
            data[spatial_unit] = shp
            print("Done.")
            
    else:
        spatial_units = ['counties', 'blocks', 'blkgps', 'tracts']
        for spatial_unit in spatial_units:
            print("Reading {spatial_unit}...".format(spatial_unit = spatial_unit))
            shp = gpd.read_file(basename + '_{spatial_unit}.shp'.format(spatial_unit = spatial_unit))
            data[spatial_unit] = shp
            print("Done.")

    # reset back to original working directory
    os.chdir(og_wd)
    
    return data

In [9]:
eastshore_clipped = read_spatial_data("eastshore", clip_coast=True)

Reading spatial data for eastshore...
Reading counties...
Done.
Reading blocks...
Done.
Reading blkgps...
Done.
Reading tracts...
Done.


In [11]:
# save clipped data
for key in eastshore_clipped:
    eastshore_clipped[key].to_file('../spatial_units/data/clipped_coast/eastshore_{sp}_clipped_coast.shp'.format(sp = key))