In [1]:
import pandas as pd
from configparser import ConfigParser
from sodapy import Socrata

In [2]:
# Setup configuration
config = ConfigParser()
config.read('./config.ini')
app_token = config['socrata']['APP_TOKEN']

# Create client to Socrata
client = Socrata(domain='data.cityofnewyork.us', app_token=app_token, timeout=60)

# NYC 311 Calls (2010-Present)
dataset = 'erm2-nwe9'

In [3]:
def get_query(dataset_identifier: str, query: str, return_df: bool=False) -> pd.DataFrame | list:
    results = client.get(dataset_identifier=dataset_identifier, query=query)
    if return_df:
        return pd.DataFrame.from_records(results)
    else:
        return results

In [4]:
# Check number of complaints to DSNY in dataset
query = (
    """
    SELECT
        COUNT(*)
    WHERE
        agency = "DSNY"
    """
)
results = get_query(dataset, query)

In [5]:
print(results)

[{'COUNT': '3711535'}]


In [6]:
# Get unique DSNY complaint_type
query = (
    """
    SELECT
        DISTINCT complaint_type
    WHERE
        agency = "DSNY"
    """
)
results = get_query(dataset, query)

In [7]:
print(results)

[{'complaint_type': 'Oil or Gas Spill'}, {'complaint_type': 'Request Changes - A.S.P.'}, {'complaint_type': 'Obstruction'}, {'complaint_type': 'Street Sweeping Complaint'}, {'complaint_type': 'Electronics Waste'}, {'complaint_type': 'Seasonal Collection'}, {'complaint_type': 'Snow or Ice'}, {'complaint_type': 'Recycling Enforcement'}, {'complaint_type': 'Vacant Lot'}, {'complaint_type': 'Sweeping/Inadequate'}, {'complaint_type': 'Residential Disposal Complaint'}, {'complaint_type': 'Literature Request'}, {'complaint_type': 'Missed Collection (All Materials)'}, {'complaint_type': 'Abandoned Bike'}, {'complaint_type': 'Overflowing Litter Baskets'}, {'complaint_type': 'Commercial Disposal Complaint'}, {'complaint_type': 'Request Large Bulky Item Collection'}, {'complaint_type': 'Graffiti'}, {'complaint_type': 'Sweeping/Missed-Inadequate'}, {'complaint_type': 'Lot Condition'}, {'complaint_type': 'Request Xmas Tree Collection'}, {'complaint_type': 'Incorrect Data'}, {'complaint_type': 'Reta

In [8]:
# Check descriptor of Dead Animal complaints
query = (
    """
    SELECT
        DISTINCT descriptor
    WHERE
        complaint_type = "Dead Animal"
    """
)
results = get_query(dataset, query)

In [9]:
print(results)

[{'descriptor': 'Other'}, {'descriptor': 'Opossum'}, {'descriptor': 'Dog'}, {'descriptor': 'Cat'}, {'descriptor': 'Bird'}, {'descriptor': 'Squirrel'}, {'descriptor': 'Rat or Mouse'}, {'descriptor': 'Raccoon'}, {'descriptor': 'Deer'}]


In [10]:
# Check number of Dead Animal complaints by descriptor
query = (
    """
    SELECT
        descriptor,
        COUNT(*) as num_dead
    WHERE
        complaint_type = "Dead Animal"
    GROUP BY
        descriptor
    ORDER BY
        num_dead DESC
    """
)
results = get_query(dataset, query)

In [11]:
print(results)  # Poor cats!

[{'descriptor': 'Cat', 'num_dead': '5152'}, {'descriptor': 'Raccoon', 'num_dead': '1741'}, {'descriptor': 'Rat or Mouse', 'num_dead': '1458'}, {'descriptor': 'Opossum', 'num_dead': '1048'}, {'descriptor': 'Squirrel', 'num_dead': '827'}, {'descriptor': 'Other', 'num_dead': '737'}, {'descriptor': 'Bird', 'num_dead': '734'}, {'descriptor': 'Dog', 'num_dead': '533'}, {'descriptor': 'Deer', 'num_dead': '295'}]


In [12]:
# Get DataFrame of Dead Animal complaints
query = (
    """
    SELECT
        created_date,
        descriptor,
        incident_zip,
        community_board,
        latitude,
        longitude,
        borough
    WHERE
        complaint_type = "Dead Animal"
        AND incident_zip IS NOT NULL
        AND community_board IS NOT NULL
        AND latitude IS NOT NULL
        AND longitude IS NOT NULL
    LIMIT 20000
    """
)
df = get_query(dataset, query, True)    # Return DataFrame instead of list

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11999 entries, 0 to 11998
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   created_date     11999 non-null  object
 1   descriptor       11999 non-null  object
 2   incident_zip     11999 non-null  object
 3   community_board  11999 non-null  object
 4   latitude         11999 non-null  object
 5   longitude        11999 non-null  object
 6   borough          11999 non-null  object
dtypes: object(7)
memory usage: 656.3+ KB


In [14]:
df.head()

Unnamed: 0,created_date,descriptor,incident_zip,community_board,latitude,longitude,borough
0,2023-01-10T23:36:29.000,Raccoon,11420,10 QUEENS,40.666085242007576,-73.82807761990782,QUEENS
1,2023-01-10T21:48:53.000,Cat,11226,17 BROOKLYN,40.643201390191344,-73.94958507875664,BROOKLYN
2,2023-01-10T20:09:25.000,Cat,11230,14 BROOKLYN,40.62719412921759,-73.96534738014583,BROOKLYN
3,2023-01-10T19:32:13.000,Dog,11434,12 QUEENS,40.69262928944387,-73.77512626733017,QUEENS
4,2023-01-10T19:29:50.000,Dog,11434,12 QUEENS,40.69289349715199,-73.77408682532501,QUEENS


In [15]:
df.tail()   # Appears Dead Animal complaint_type only used since 2021-09-22

Unnamed: 0,created_date,descriptor,incident_zip,community_board,latitude,longitude,borough
11994,2021-09-22T09:02:04.000,Cat,11208,05 BROOKLYN,40.661535923573474,-73.87883537653437,BROOKLYN
11995,2021-09-22T08:40:12.000,Cat,11220,07 BROOKLYN,40.6360756487013,-74.00895344828234,BROOKLYN
11996,2021-09-22T08:15:33.000,Cat,10305,02 STATEN ISLAND,40.597840901906025,-74.07555480884422,STATEN ISLAND
11997,2021-09-22T07:12:33.000,Cat,11221,03 BROOKLYN,40.68780863369472,-73.93666785274185,BROOKLYN
11998,2021-09-22T02:57:31.000,Cat,11416,09 QUEENS,40.68959050607829,-73.84071697793438,QUEENS
