In [114]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape, Point
import glob
from fuzzywuzzy import fuzz

In [115]:
def create_query_geo_df(query_file):
    query_points = pd.read_csv(query_file)
    query_points.sort_values(by='STATE', inplace=True)

    # Create a Point object and add to dataframe. Convert to Geopandas Dataframe
    geometry = [Point(xy) for xy in zip(query_points['LONGITUDE'], query_points['LATITUDE'])]
    crs = {'init': 'epsg:4326'}
    query_df = gpd.GeoDataFrame(query_points, crs=crs, geometry=geometry)
    
    return query_df

In [116]:
def generate_state_zipcode_files(states):
    """Match the state to the appropriate zipcode geojson file. Use once only,
    but should still be a function."""
    zip_code_files = glob.glob('../data/State-zip-code-GeoJSON/*.json')
    state_zipcode_files = {}
    for state in states:
        similarity = []
        for zip_code_file in zip_code_files:
            query_state_name = '_'.join(state.lower().split(' '))
            state_name = '_'.join(zip_code_file.split('/')[-1].split('_')[:3]).lower()
            token_sort_ratio = fuzz.token_set_ratio(query_state_name, state_name)
            similarity.append(token_sort_ratio)
        closest_match = zip_code_files[similarity.index(max(similarity))]
        state_zipcode_files[state] = closest_match
    return state_zipcode_files

In [154]:
def append_zipcode(query_df, zip_geo_file, state):
    """Add zipcode to the query points for given state.
    Read in the correct corresponding zipcode polygon files. 
    For each point loop through all the zipcode polygons in the state and check
    whether it belongs to one. Save the zipcode if found.
    """
    points = query_df[query_df.STATE == state]['geometry']
    
    results = []
    zip_geo_df = gpd.read_file(zip_geo_file)
    zip_codes = []
  
    for point in points:
        correct_zip = np.nan
        for i, poly in zip_geo_df[['GEOID10', 'geometry']].iterrows():
            try:
                contains = poly['geometry'].contains(point)
                if contains == True: 
                    correct_zip = poly['GEOID10']
                    break
            except Exception:
                print('Invalid polygon')
                pass
        zip_codes.append(correct_zip)
    query_df['ZIPCODE'] = zip_codes
    return query_df

In [118]:
state_zip_files = generate_state_zipcode_files(states)

In [132]:
zip_geo_file = '../data/ks_kansas_zip_codes_geo.min.json'

In [147]:
query_file = '../data/storm_coordinates.csv'
query_df = create_query_geo_df(query_file)
query_df_kansas = query_df[query_df.STATE == 'KANSAS']

In [156]:
query_with_zipcode = append_zipcode(query_df_kansas, zip_geo_file, 'KANSAS')
final = query_with_zipcode.drop(columns=['geometry'])
final.to_csv('../output/KANSAS_events.csv', index=False)

In [None]:
f, ax = plt.subplots(figsize=(7, 10))
alabama_df[alabama_df.GEOID10 == correct].plot(ax=ax)
ax.plot(lon, lat, color='r', marker='o')
ax.set_title('Alabama zip {}'.format(correct))