In [1]:
# data manipulation
import numpy as np
import pandas as pd
import geopandas as gpd
import geodatasets

# visualization
import matplotlib.pyplot as plt
from matplotlib import font_manager
from matplotlib.font_manager import FontProperties
# from highlight_text import fig_text, ax_text
from matplotlib.patches import FancyArrowPatch

import geoplot
import geoplot.crs as gcrs


In [2]:

epsilon_km = 0.5  # 500 meters = 0.5 km
epsilon_rad = epsilon_km / 6371.0  # convert to radians

In [3]:
reports_path = 'reports.csv'
reports = pd.read_csv(reports_path)
reports.head()

Unnamed: 0,민원접수일,민원접수시간,주소,경도,위도,요일
0,2021-09-29,19:29:00,서울특별시 강서구 강서로15길 49,126.843247,37.532089,Weekday
1,2021-09-29,18:48:00,성북구 오패산로19길 34-5,127.033761,37.609537,Weekday
2,2021-09-29,18:47:00,장위로21다길 59-19 주소지 앞도로 외,127.045741,37.616406,Weekday
3,2021-09-29,18:47:00,서울특별시 강북구 오패산로30길 13,127.034685,37.61382,Weekday
4,2021-09-29,18:46:00,서울특별시 강서구 강서로18길 52-5,126.848703,37.534293,Weekday


In [4]:
# rename columns
reports.rename(columns={
    '위도': 'latitude',
    '경도': 'longitude',
    '민원접수일': 'date',
    '주소': 'address',
}, inplace=True)

In [5]:
coords = reports[['latitude', 'longitude']].to_numpy()
coords_rad = np.radians(coords)

In [6]:
from sklearn.cluster import DBSCAN


---

In [14]:
import pandas as pd
import numpy as np

# Load data

# Grid size in degrees (approx ~1km)
lat_grid_size = 0.009  # ~1km in latitude
lon_grid_size = 0.011  # ~1km in longitude (adjust for your area)

# Assign grid cells
reports['lat_grid'] = (reports['latitude'] // lat_grid_size).astype(int)
reports['lon_grid'] = (reports['longitude'] // lon_grid_size).astype(int)

# Create a combined grid key
reports['grid_id'] = reports['lat_grid'].astype(str) + "_" + reports['lon_grid'].astype(str)

In [15]:
from sklearn.cluster import DBSCAN

def cluster_grid(group):
    coords = group[['latitude', 'longitude']].to_numpy()
    coords_rad = np.radians(coords)
    
    epsilon_rad = 0.5 / 6371.0  # 500m radius in radians

    db = DBSCAN(eps=epsilon_rad, min_samples=1, metric='haversine')
    group['local_cluster'] = db.fit_predict(coords_rad)
    
    # Combine grid id and local cluster to get a global cluster id
    group['global_cluster'] = group['grid_id'] + "_" + group['local_cluster'].astype(str)
    return group

In [16]:
df_clustered = reports.groupby('grid_id', group_keys=False).apply(cluster_grid)

  df_clustered = reports.groupby('grid_id', group_keys=False).apply(cluster_grid)


In [17]:
result = df_clustered.groupby('global_cluster').agg({
    'latitude': 'mean',
    'longitude': 'mean',
    'global_cluster': 'count'
}).rename(columns={'global_cluster': 'count'}).reset_index()

In [20]:
df_clustered.head()

Unnamed: 0,date,민원접수시간,address,longitude,latitude,요일,lat_grid,lon_grid,grid_id,local_cluster,global_cluster
0,2021-09-29,19:29:00,서울특별시 강서구 강서로15길 49,126.843247,37.532089,Weekday,4170,11531,4170_11531,0,4170_11531_0
1,2021-09-29,18:48:00,성북구 오패산로19길 34-5,127.033761,37.609537,Weekday,4178,11548,4178_11548,0,4178_11548_0
2,2021-09-29,18:47:00,장위로21다길 59-19 주소지 앞도로 외,127.045741,37.616406,Weekday,4179,11549,4179_11549,0,4179_11549_0
3,2021-09-29,18:47:00,서울특별시 강북구 오패산로30길 13,127.034685,37.61382,Weekday,4179,11548,4179_11548,0,4179_11548_0
4,2021-09-29,18:46:00,서울특별시 강서구 강서로18길 52-5,126.848703,37.534293,Weekday,4170,11531,4170_11531,0,4170_11531_0


In [None]:
parks_path = 'parks.csv'
parks = pd.read_csv(parks_path)



In [10]:
parks.rename(columns={
    '위도': 'latitude',
    '경도': 'longitude',
    '주소': 'address'
}, inplace=True)

In [12]:
# drop columns except for latitude, longitude, and address
parks = parks[['latitude', 'longitude', 'address']]

In [16]:
parks.head()
parks.shape

(1463, 3)

In [7]:
def haversine_array(lat, lon, coords):
    R = 6371000  # m
    φ1, λ1 = np.radians(lat), np.radians(lon)
    φ2, λ2 = np.radians(coords[:,0]), np.radians(coords[:,1])
    dφ, dλ = φ2 - φ1, λ2 - λ1
    a = np.sin(dφ/2)**2 + np.cos(φ1)*np.cos(φ2)*np.sin(dλ/2)**2
    return 2 * R * np.arcsin(np.sqrt(a))

In [14]:
complaint_counts = []
for _, park in parks.iterrows():
    dists = haversine_array(park['latitude'], park['longitude'], coords)
    complaint_counts.append((dists <= 500).sum())
df = parks.copy()
df['민원 수'] = complaint_counts

In [15]:
df

Unnamed: 0,latitude,longitude,address,민원 수
0,37.477263,127.066477,강남구 개포동 126-2,392
1,37.481496,127.048218,강남구 개포동 1266-0,4191
2,37.494938,127.079307,강남구 개포동 13-2,3123
3,37.477888,127.065835,강남구 개포동 567-23,395
4,37.508171,127.026290,강남구 논현동 168-0,29290
...,...,...,...,...
1458,37.570006,127.017298,중구 흥인동 162-1 0,26973
1459,37.598946,127.099071,중랑구 망우동 486-10,4366
1460,37.574183,127.089299,중랑구 면목동 1-4,1725
1461,37.579494,127.081451,중랑구 면목동 168-2,2435


In [17]:
df.to_csv('parks_complaints.csv', index=False)