In [1]:
import os
import sys
import pandas as pd
import numpy as np
import datetime
import urllib
import requests
from sodapy import Socrata
import warnings
warnings.filterwarnings('ignore')


import geopandas as gpd
from geopandas.tools import sjoin
from fiona.crs import from_epsg
import shapely
from shapely.geometry import Point, MultiPoint
import io

from APPTOKEN import myToken 
## Import your API key as a file named APPTOKEN.py stored as a variable myToken
## Or just define your API key as a variable called myToken

### Access Data
- 311 Complaints dataset on NYC Open Data
- NTA Boundary shapefiles, also on NYC Open Data

In [2]:
endpoint =  'https://data.cityofnewyork.us/resource/fhrw-4uyv.json'

domain = 'data.cityofnewyork.us'
data_id = 'fhrw-4uyv'
token = myToken
client = Socrata(domain, token)

In [3]:
results = []
for i in range(3):
    query ="""
    select
        latitude,
        longitude,
        agency_name,
        complaint_type,
        descriptor,
        date_extract_hh(created_date),
        date_trunc_ymd(created_date) as day
    where 
        created_date >= '2016-11-09T00:00:00.000' and created_date < '2016-11-10T00:00:00.000'
    limit
        50000
    offset
        {}
    """.format(i*50000)
    results = results + client.get(data_id, query=query)
    print("Number of results downloaded: {}".format(len(results)))


Number of results downloaded: 5530
Number of results downloaded: 5530
Number of results downloaded: 5530


In [4]:
def CoordstoGDF(dataframe):
    dataframe['lonlat']=list(zip(dataframe.longitude.astype(float), dataframe.latitude.astype(float)))
    dataframe['geometry']=dataframe[['lonlat']].applymap(lambda x:shapely.geometry.Point(x))
    crs = {'init':'epsg:4326', 'no_defs': True}
    return gpd.GeoDataFrame(dataframe, crs=crs, geometry=dataframe['geometry'])

In [5]:
trump311DayAfter = pd.DataFrame(results)
trump311DayAfter = CoordstoGDF(trump311DayAfter)

In [6]:
NTA = 'https://data.cityofnewyork.us/api/geospatial/d3qk-pfyz?method=export&format=GeoJSON'
nta = gpd.read_file(NTA)
nta.drop(columns=['ntacode', 'borocode', 'countyfips'], inplace=True)

In [7]:
trump311DayAfter = sjoin( nta, trump311DayAfter, how='left')

In [8]:
trump311DayAfter.drop(columns=['boroname', 'index_right','day', 'latitude',
                                 'longitude', 'lonlat'],
                        inplace=True)

In [9]:
trump311DayAfter = trump311DayAfter.rename(columns={'date_extract_hh_created_date':'hour'})
trump311DayAfter.index.name = 'nta_ind'

In [10]:
df = pd.DataFrame(trump311DayAfter.groupby(['nta_ind','hour'])['descriptor'].count())
df['nta'] = df.index.map(lambda x: x[0])
df['hour'] = df.index.map(lambda x: x[1])
df.rename(columns={'descriptor':'hourly_counts'},inplace=True)

df2 = pd.DataFrame(trump311DayAfter.groupby(['nta_ind','hour'])['complaint_type'].agg(pd.Series.mode))
df = df2.merge(df, on=df.index)

In [11]:
nta['count'] = 0
for a in trump311DayAfter.index.unique():
    nta['count'][a] = len(trump311DayAfter[trump311DayAfter.index==a])

In [12]:
import numpy
nta['mode'] = 0
for a in trump311DayAfter.index.unique():
    complaintType = trump311DayAfter.groupby('nta_ind')['complaint_type'].agg(pd.Series.mode)[a]
    if len(complaintType) == 0:
        continue
    if type(complaintType) == numpy.ndarray:
        nta['mode'][a]= complaintType[0]
    else:
        nta['mode'][a] = complaintType


In [13]:
nta = nta.merge(df, left_on=nta.index, right_on='nta')
nta.drop(columns=['nta', 'key_0'], inplace=True)

In [15]:
def simplifyArrays(x):
    import numpy
    if type(x) == numpy.ndarray:
        return x[0]
    else:
        return x

In [16]:
nta['complaint_type'] = nta['complaint_type'].map(simplifyArrays)


In [18]:
nta.to_file("trump_Elec_DayAfter.geojson", driver='GeoJSON')
