In [16]:
# 1. Install required packages (run once)
!pip install folium pandas



In [17]:
# 2. Import libraries
import pandas as pd
import folium
from folium.plugins import MarkerCluster, HeatMap

In [18]:
# 3. Load dataset (adjust path if needed)
df = pd.read_csv("sf_crime_data.csv")
print(f"Original data shape: {df.shape}")

# Drop rows with missing coordinates
df = df.dropna(subset=['Latitude', 'Longitude'])

# Filter by date (optional)
df['Incident Date'] = pd.to_datetime(df['Incident Date'])
df = df[df['Incident Date'] >= '2023-01-01']
print(f"Filtered data shape: {df.shape}")

Original data shape: (207624, 37)
Filtered data shape: (47647, 37)


In [19]:
# 4. Sample the dataset for performance
df_sampled = df.sample(n=1000, random_state=42)  # Reduce n if map still lags
df_sampled.head()

Unnamed: 0,Row ID,Incident Datetime,Incident Date,Incident Time,Incident Year,Incident Day of Week,Report Datetime,Incident ID,Incident Number,CAD Number,...,data_as_of,data_loaded_at,Neighborhoods,ESNCAG - Boundary File,Central Market/Tenderloin Boundary Polygon - Updated,Civic Center Harm Reduction Project Boundary,HSOC Zones as of 2018-06-05,Invest In Neighborhoods (IIN) Areas,Current Supervisor Districts,Current Police Districts
15931,145884606244,2025/01/20 08:30:00 AM,2025-01-20,08:30,2025,Monday,2025/01/26 01:44:00 PM,1458846.0,256009985.0,,...,2025/06/12 10:07:02 AM,2025/06/13 09:52:57 AM,39.0,,,,,,7.0,10.0
45371,148625907055,2025/05/27 12:28:00 PM,2025-05-27,12:28,2025,Tuesday,2025/05/27 12:28:00 PM,1486259.0,250296055.0,251471701.0,...,2025/06/12 10:07:02 AM,2025/06/13 09:52:57 AM,83.0,,,,,,2.0,9.0
33708,147522062010,2025/04/10 07:30:00 AM,2025-04-10,07:30,2025,Thursday,2025/04/10 07:30:00 AM,1475220.0,240699522.0,,...,2025/06/12 10:07:02 AM,2025/06/13 09:52:57 AM,32.0,,,,,,10.0,1.0
43340,148438216650,2025/05/19 08:39:00 AM,2025-05-19,08:39,2025,Monday,2025/05/19 08:42:00 AM,1484382.0,250279794.0,251390673.0,...,2025/06/12 10:07:02 AM,2025/06/13 09:52:57 AM,38.0,,,,5.0,,5.0,3.0
42468,148355571000,2025/05/12 04:48:00 PM,2025-05-12,16:48,2025,Monday,2025/05/12 05:53:00 PM,1483555.0,256048529.0,,...,2025/06/12 10:07:02 AM,2025/06/13 09:52:57 AM,19.0,,,,,,3.0,6.0


In [20]:
# 5. Create base map
sf_map = folium.Map(location=[37.77, -122.42], zoom_start=12)

# Add clustered markers
marker_cluster = MarkerCluster().add_to(sf_map)

for _, row in df_sampled.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=row['Incident Category']
    ).add_to(marker_cluster)

# Save to HTML
sf_map.save("sf_crime_clustered_map.html")
sf_map

In [21]:
# 6. (Optional) HeatMap instead of markers
sf_map_heat = folium.Map(location=[37.77, -122.42], zoom_start=12)
heat_data = df_sampled[['Latitude', 'Longitude']].values.tolist()
HeatMap(heat_data, radius=10).add_to(sf_map_heat)

sf_map_heat.save("sf_crime_heatmap.html")
sf_map_heat