# NYPD-Complaint-Data to DB

* Clean [NYPD Complaint Data](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Historic/qgea-i56i/about_data) before loading it into MySQL DB.
* The data is downloaded as a `.csv` locally to perform the cleaning.

In [1]:
import sys
import os

sys.path.append(f"..{os.path.sep}")

data_dir = 'data'
fname = 'NYPD_Complaint_Data_Historic_20241027.csv'
fpath = os.path.join(sys.path[-1], data_dir, fname)

assert os.path.exists(fpath), f'{os.path.abspath(fpath)} does not exists!'

In [2]:
import polars as pl

complaint_lf = pl.scan_csv(fpath, try_parse_dates=True)
complaint_lf.collect_schema()

Schema([('CMPLNT_NUM', String),
        ('CMPLNT_FR_DT', String),
        ('CMPLNT_FR_TM', String),
        ('CMPLNT_TO_DT', String),
        ('CMPLNT_TO_TM', String),
        ('ADDR_PCT_CD', Int64),
        ('RPT_DT', String),
        ('KY_CD', Int64),
        ('OFNS_DESC', String),
        ('PD_CD', Int64),
        ('PD_DESC', String),
        ('CRM_ATPT_CPTD_CD', String),
        ('LAW_CAT_CD', String),
        ('BORO_NM', String),
        ('LOC_OF_OCCUR_DESC', String),
        ('PREM_TYP_DESC', String),
        ('JURIS_DESC', String),
        ('JURISDICTION_CODE', Int64),
        ('PARKS_NM', String),
        ('HADEVELOPT', String),
        ('HOUSING_PSA', String),
        ('X_COORD_CD', Int64),
        ('Y_COORD_CD', Int64),
        ('SUSP_AGE_GROUP', String),
        ('SUSP_RACE', String),
        ('SUSP_SEX', String),
        ('TRANSIT_DISTRICT', String),
        ('Latitude', Float64),
        ('Longitude', Float64),
        ('Lat_Lon', String),
        ('PATROL_BORO', String),


In [3]:
unique_lat_lon = complaint_lf.select(pl.col('Lat_Lon').unique(),
                                     pl.col('Lat_Lon').unique()
                                     .str.strip_chars('()')
                                     .str.split_exact(",",1)
                                     .struct.rename_fields(['lat','lon'])
                                     .alias("fields")).unnest("fields").with_columns(
                                         pl.col('lat').str.strip_chars(' ').cast(pl.Decimal),
                                         pl.col('lon').str.strip_chars(' ').cast(pl.Decimal))

In [4]:
unique_lat_lon_df = unique_lat_lon.collect().to_pandas()
unique_lat_lon_df.head()

Unnamed: 0,Lat_Lon,lat,lon
0,"(40.66121, -73.831726)",40.66121,-73.831726
1,"(40.601992, -74.131327)",40.601992,-74.131327
2,"(40.62335633, -74.00783862)",40.62335633,-74.00783862
3,"(40.62990794, -74.11000378)",40.62990794,-74.11000378
4,"(40.692722, -73.994325)",40.692722,-73.994325


In [5]:
import geopandas

unique_lat_lon_gdf = geopandas.GeoDataFrame(unique_lat_lon_df,
                             geometry=geopandas.points_from_xy(unique_lat_lon_df.lon, unique_lat_lon_df.lat),
                             crs="EPSG:4326")

* [Modified Zip Code Tabulation Areas (MODZCTA)](https://data.cityofnewyork.us/Health/Modified-Zip-Code-Tabulation-Areas-MODZCTA-/pri4-ifjk/about_data) is downloaded as `.geojson` locally.

In [6]:
fname = 'MODZCTA.geojson'
fpath = os.path.join(sys.path[-1], data_dir, fname)

assert os.path.exists(fpath), f'{os.path.abspath(fpath)} does not exists!'

geo_df = geopandas.read_file(fpath)

In [7]:
import numpy as np

zips = np.empty(unique_lat_lon_gdf.shape[0], dtype=object)
for i, geom in enumerate(geo_df.geometry):
    zips[unique_lat_lon_gdf.within(geom)] = geo_df.modzcta[i]
zips[zips==None] = ''
unique_lat_lon_gdf['zipcode'] = zips

In [8]:
lat_lon_zip_lf = pl.from_pandas(unique_lat_lon_gdf[['Lat_Lon','zipcode']]).lazy()
lat_lon_zip_lf.collect_schema()

Schema([('Lat_Lon', String), ('zipcode', String)])

In [9]:
complaint_lf = complaint_lf.join(lat_lon_zip_lf, on='Lat_Lon')
complaint_lf.collect_schema()

Schema([('CMPLNT_NUM', String),
        ('CMPLNT_FR_DT', String),
        ('CMPLNT_FR_TM', String),
        ('CMPLNT_TO_DT', String),
        ('CMPLNT_TO_TM', String),
        ('ADDR_PCT_CD', Int64),
        ('RPT_DT', String),
        ('KY_CD', Int64),
        ('OFNS_DESC', String),
        ('PD_CD', Int64),
        ('PD_DESC', String),
        ('CRM_ATPT_CPTD_CD', String),
        ('LAW_CAT_CD', String),
        ('BORO_NM', String),
        ('LOC_OF_OCCUR_DESC', String),
        ('PREM_TYP_DESC', String),
        ('JURIS_DESC', String),
        ('JURISDICTION_CODE', Int64),
        ('PARKS_NM', String),
        ('HADEVELOPT', String),
        ('HOUSING_PSA', String),
        ('X_COORD_CD', Int64),
        ('Y_COORD_CD', Int64),
        ('SUSP_AGE_GROUP', String),
        ('SUSP_RACE', String),
        ('SUSP_SEX', String),
        ('TRANSIT_DISTRICT', String),
        ('Latitude', Float64),
        ('Longitude', Float64),
        ('Lat_Lon', String),
        ('PATROL_BORO', String),


In [11]:
complaint_lf.collect()

CMPLNT_NUM,CMPLNT_FR_DT,CMPLNT_FR_TM,CMPLNT_TO_DT,CMPLNT_TO_TM,ADDR_PCT_CD,RPT_DT,KY_CD,OFNS_DESC,PD_CD,PD_DESC,CRM_ATPT_CPTD_CD,LAW_CAT_CD,BORO_NM,LOC_OF_OCCUR_DESC,PREM_TYP_DESC,JURIS_DESC,JURISDICTION_CODE,PARKS_NM,HADEVELOPT,HOUSING_PSA,X_COORD_CD,Y_COORD_CD,SUSP_AGE_GROUP,SUSP_RACE,SUSP_SEX,TRANSIT_DISTRICT,Latitude,Longitude,Lat_Lon,PATROL_BORO,STATION_NAME,VIC_AGE_GROUP,VIC_RACE,VIC_SEX,zipcode
str,str,time,str,str,i64,str,i64,str,i64,str,str,str,str,str,str,str,i64,str,str,str,i64,i64,str,str,str,str,f64,f64,str,str,str,str,str,str,str
"""25636218""","""12/05/2006""",11:38:00,,"""(null)""",13,"""12/05/2006""",105,"""ROBBERY""",361,"""ROBBERY,BANK""","""COMPLETED""","""FELONY""","""MANHATTAN""","""INSIDE""","""BANK""","""N.Y. POLICE DEPT""",0,"""(null)""","""(null)""","""(null)""",987174,209509,"""(null)""","""WHITE""","""M""",,40.741731,-73.989447,"""(40.741731, -73.989447)""","""PATROL BORO MAN SOUTH""","""(null)""","""(null)""","""UNKNOWN""","""D""","""10010"""
"""23364313""","""07/12/2006""",14:16:00,,"""(null)""",79,"""07/12/2006""",114,"""ARSON""",263,"""ARSON 2,3,4""","""COMPLETED""","""FELONY""","""BROOKLYN""","""INSIDE""","""RESIDENCE-HOUSE""","""OTHER""",97,"""(null)""","""(null)""","""(null)""",1000666,190518,"""(null)""","""(null)""","""(null)""",,40.689592,-73.940805,"""(40.689592, -73.940805)""","""PATROL BORO BKLYN NORTH""","""(null)""","""45-64""","""BLACK""","""M""","""11221"""
"""25523851""","""11/26/2006""",16:00:00,,"""(null)""",77,"""11/26/2006""",114,"""ARSON""",263,"""ARSON 2,3,4""","""COMPLETED""","""FELONY""","""BROOKLYN""","""INSIDE""","""RESIDENCE-HOUSE""","""OTHER""",97,"""(null)""","""(null)""","""(null)""",1003175,185813,"""(null)""","""(null)""","""(null)""",,40.676671,-73.931768,"""(40.676671, -73.931768)""","""PATROL BORO BKLYN NORTH""","""(null)""","""18-24""","""BLACK""","""F""","""11213"""
"""25435616""","""11/21/2006""",02:00:00,,"""(null)""",45,"""11/21/2006""",105,"""ROBBERY""",389,"""ROBBERY,DWELLING""","""ATTEMPTED""","""FELONY""","""BRONX""","""(null)""","""STREET""","""N.Y. POLICE DEPT""",0,"""(null)""","""(null)""","""(null)""",1031993,249364,"""(null)""","""BLACK""","""M""",,40.850992,-73.827426,"""(40.85099243, -73.82742619)""","""PATROL BORO BRONX""","""(null)""","""65+""","""WHITE""","""M""","""10461"""
"""34143267""","""09/01/2007""",09:45:00,,"""(null)""",1,"""09/01/2007""",105,"""ROBBERY""",361,"""ROBBERY,BANK""","""COMPLETED""","""FELONY""","""MANHATTAN""","""INSIDE""","""BANK""","""N.Y. POLICE DEPT""",0,"""(null)""","""(null)""","""(null)""",981067,197085,"""(null)""","""BLACK""","""M""",,40.707632,-74.011479,"""(40.707632, -74.011479)""","""PATROL BORO MAN SOUTH""","""(null)""","""(null)""","""UNKNOWN""","""D""","""10005"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""272431385""","""08/01/2023""",03:45:00,"""08/01/2023""","""03:50:00""",105,"""08/04/2023""",341,"""PETIT LARCENY""",321,"""LARCENY,PETIT FROM AUTO""","""COMPLETED""","""MISDEMEANOR""","""QUEENS""","""FRONT OF""","""STREET""","""N.Y. POLICE DEPT""",0,"""(null)""","""(null)""","""(null)""",1059754,193550,"""UNKNOWN""","""UNKNOWN""","""U""",,40.697605,-73.727707,"""(40.697605, -73.727707)""","""PATROL BORO QUEENS SOUTH""","""(null)""","""65+""","""BLACK""","""M""","""11411"""
"""273033152""","""08/16/2023""",21:00:00,"""08/16/2023""","""21:05:00""",105,"""08/17/2023""",344,"""ASSAULT 3 & RELATED OFFENSES""",101,"""ASSAULT 3""","""COMPLETED""","""MISDEMEANOR""","""QUEENS""","""(null)""","""PARK/PLAYGROUND""","""N.Y. POLICE DEPT""",0,"""(null)""","""(null)""","""(null)""",1057395,201036,"""25-44""","""BLACK""","""F""",,40.718172,-73.736134,"""(40.71817168981014, -73.736133…","""PATROL BORO QUEENS SOUTH""","""(null)""","""25-44""","""BLACK""","""F""","""11428"""
"""270041815""","""06/19/2023""",01:50:00,"""06/19/2023""","""04:30:00""",105,"""06/19/2023""",107,"""BURGLARY""",223,"""BURGLARY,RESIDENCE,NIGHT""","""COMPLETED""","""FELONY""","""QUEENS""","""INSIDE""","""RESIDENCE-HOUSE""","""N.Y. POLICE DEPT""",0,"""(null)""","""(null)""","""(null)""",1058712,181232,"""UNKNOWN""","""UNKNOWN""","""U""",,40.663804,-73.731601,"""(40.663804, -73.731601)""","""PATROL BORO QUEENS SOUTH""","""(null)""","""45-64""","""ASIAN / PACIFIC ISLANDER""","""M""","""11422"""
"""273151416""","""08/19/2023""",15:37:00,"""08/19/2023""","""15:52:00""",113,"""08/19/2023""",126,"""MISCELLANEOUS PENAL LAW""",117,"""RECKLESS ENDANGERMENT 1""","""COMPLETED""","""FELONY""","""QUEENS""","""FRONT OF""","""STREET""","""N.Y. POLICE DEPT""",0,"""(null)""","""(null)""","""(null)""",1053398,193555,"""25-44""","""BLACK""","""F""",,40.697672,-73.750627,"""(40.697672, -73.750627)""","""PATROL BORO QUEENS SOUTH""","""(null)""","""UNKNOWN""","""UNKNOWN""","""M""","""11412"""
