In [3]:
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN
import folium

# Load data from CSV

df = pd.read_csv('CleanedSeattleData.csv')  # Replace 'your_file_path.csv' with the path to your CSV file

# Prepare the coordinates data
coords = df[['Y', 'X']].values

# Set the radius of the Earth in kilometers and the distance in km for epsilon
kms_per_radian = 6371.0088
epsilon = 0.5 / kms_per_radian  # 0.5 km radius for neighborhood

# DBSCAN clustering
db = DBSCAN(eps=epsilon, min_samples=5, algorithm='ball_tree', metric='haversine')
df['cluster'] = db.fit_predict(np.radians(coords))

# Create a map centered around the average location
map_centre = df[['Y', 'X']].mean().values
mymap = folium.Map(location=map_centre, zoom_start=10)

# Define colors for different clusters
colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred',
          'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue',
          'darkpurple', 'white', 'pink', 'lightblue', 'lightgreen',
          'gray', 'black', 'lightgray']

# Add points to the map
for idx, row in df.iterrows():
    cluster_index = int(row['cluster'])  # Ensure the cluster index is an integer
    if cluster_index == -1:  # Check if it's noise, which DBSCAN marks as -1
        color = 'black'  # Assign a default color for noise
    else:
        color = colors[cluster_index % len(colors)]  # Use modulo to cycle through colors if there are more clusters than colors
    
    folium.CircleMarker(
        location=(row['Y'], row['X']),
        radius=5,
        fill=True,
        color=color,
        fill_color=color,
        fill_opacity=0.7
    ).add_to(mymap)

# Display the map
mymap.save('map.html')  # This saves the map as an HTML file that you can open in a browser


KeyboardInterrupt: 