In [52]:
import pandas as pd
import geopandas as gpd

In [53]:
# File paths
census_tract_shp = 'chicago_census_tract/geo_export_2d3ffe6a-dd57-46d5-86e3-4f42a4df0e73.shp'
crime_csv = 'crime_data/crime_chicago_2001_2024.csv'

# Generate list of years from 2001 to 2024
years = list(range(2001, 2025))

In [54]:
# Read in the census tract shapefile
tracts_gdf = gpd.read_file(census_tract_shp)
tracts_gdf = tracts_gdf.to_crs(epsg=4326)
tracts_gdf = tracts_gdf[['geometry', 'geoid10']]

# Read in the crime points data, this is very large dataset
crime_df = pd.read_csv(crime_csv)

In [55]:
# Define funtion to aggreate crime points counts by census tract, by type of crime
def aggregate_points(points_gdf, geometry_gdf):
    points_gdf = gpd.sjoin(points_gdf, geometry_gdf, how='inner', predicate='within')
    points_gdf = points_gdf.groupby(['geoid10', 'Primary Type']).size().reset_index(name='count')
    return points_gdf
    

In [56]:
# Create an empty dataframe to store the combined results
combined_results = pd.DataFrame()

# Loop through each year
for year in years:
    # Filter the crime data for the current year
    crime_df_year = crime_df[crime_df['Year'] == year]
    
    # Convert the filtered crime data to a GeoDataFrame
    crime_gdf_year = gpd.GeoDataFrame(crime_df_year, geometry=gpd.points_from_xy(crime_df_year.Longitude, crime_df_year.Latitude))
    crime_gdf_year = crime_gdf_year.set_crs(epsg=4326)
    
    # Aggregate the crime points by census tract
    aggregated_points = aggregate_points(crime_gdf_year, tracts_gdf)
    
    # Add the year to the aggregated points dataframe
    aggregated_points['Year'] = year
    
    # Append the results to the combined dataframe
    combined_results = pd.concat([combined_results, aggregated_points], ignore_index=True)

In [57]:
# Save the combined results to a CSV file
combined_results.to_csv('crime_data/crime_chicago_2001_2024_by_tract_type.csv', index=False)
# Randomly sample 10,000 rows from the combined results, and display the first 5 rows.
combined_results.sample(10000).head()

Unnamed: 0,geoid10,Primary Type,count,Year
114562,17031620100,PUBLIC PEACE VIOLATION,3,2008
150305,17031150402,ASSAULT,4,2011
161846,17031020500,BURGLARY,28,2012
147169,17031842200,SEX OFFENSE,1,2010
167772,17031301100,WEAPONS VIOLATION,6,2012
