In [1]:
# data manipulation
import numpy as np
import pandas as pd
import geopandas as gpd
import geodatasets

# visualization
import matplotlib.pyplot as plt
from matplotlib import font_manager
from matplotlib.font_manager import FontProperties
# from highlight_text import fig_text, ax_text
from matplotlib.patches import FancyArrowPatch

import geoplot
import geoplot.crs as gcrs


In [2]:

epsilon_km = 0.5  # 500 meters = 0.5 km
epsilon_rad = epsilon_km / 6371.0  # convert to radians

In [25]:
reports_path = 'reports.csv'
reports = pd.read_csv(reports_path)
reports.head()

Unnamed: 0,민원접수일,민원접수시간,주소,경도,위도,요일
0,2021-09-29,19:29:00,서울특별시 강서구 강서로15길 49,126.843247,37.532089,Weekday
1,2021-09-29,18:48:00,성북구 오패산로19길 34-5,127.033761,37.609537,Weekday
2,2021-09-29,18:47:00,장위로21다길 59-19 주소지 앞도로 외,127.045741,37.616406,Weekday
3,2021-09-29,18:47:00,서울특별시 강북구 오패산로30길 13,127.034685,37.61382,Weekday
4,2021-09-29,18:46:00,서울특별시 강서구 강서로18길 52-5,126.848703,37.534293,Weekday


In [4]:
# rename columns
reports.rename(columns={
    '위도': 'latitude',
    '경도': 'longitude',
    '민원접수일': 'date',
    '주소': 'address',
}, inplace=True)

In [26]:
coords = reports[['경도', '위도']].to_numpy()


In [5]:
coords = reports[['latitude', 'longitude']].to_numpy()
coords_rad = np.radians(coords)

In [6]:
from sklearn.cluster import DBSCAN


---

In [14]:
import pandas as pd
import numpy as np

# Load data

# Grid size in degrees (approx ~1km)
lat_grid_size = 0.009  # ~1km in latitude
lon_grid_size = 0.011  # ~1km in longitude (adjust for your area)

# Assign grid cells
reports['lat_grid'] = (reports['latitude'] // lat_grid_size).astype(int)
reports['lon_grid'] = (reports['longitude'] // lon_grid_size).astype(int)

# Create a combined grid key
reports['grid_id'] = reports['lat_grid'].astype(str) + "_" + reports['lon_grid'].astype(str)

In [15]:
from sklearn.cluster import DBSCAN

def cluster_grid(group):
    coords = group[['latitude', 'longitude']].to_numpy()
    coords_rad = np.radians(coords)
    
    epsilon_rad = 0.5 / 6371.0  # 500m radius in radians

    db = DBSCAN(eps=epsilon_rad, min_samples=1, metric='haversine')
    group['local_cluster'] = db.fit_predict(coords_rad)
    
    # Combine grid id and local cluster to get a global cluster id
    group['global_cluster'] = group['grid_id'] + "_" + group['local_cluster'].astype(str)
    return group

In [16]:
df_clustered = reports.groupby('grid_id', group_keys=False).apply(cluster_grid)

  df_clustered = reports.groupby('grid_id', group_keys=False).apply(cluster_grid)


In [17]:
result = df_clustered.groupby('global_cluster').agg({
    'latitude': 'mean',
    'longitude': 'mean',
    'global_cluster': 'count'
}).rename(columns={'global_cluster': 'count'}).reset_index()

In [20]:
df_clustered.head()

Unnamed: 0,date,민원접수시간,address,longitude,latitude,요일,lat_grid,lon_grid,grid_id,local_cluster,global_cluster
0,2021-09-29,19:29:00,서울특별시 강서구 강서로15길 49,126.843247,37.532089,Weekday,4170,11531,4170_11531,0,4170_11531_0
1,2021-09-29,18:48:00,성북구 오패산로19길 34-5,127.033761,37.609537,Weekday,4178,11548,4178_11548,0,4178_11548_0
2,2021-09-29,18:47:00,장위로21다길 59-19 주소지 앞도로 외,127.045741,37.616406,Weekday,4179,11549,4179_11549,0,4179_11549_0
3,2021-09-29,18:47:00,서울특별시 강북구 오패산로30길 13,127.034685,37.61382,Weekday,4179,11548,4179_11548,0,4179_11548_0
4,2021-09-29,18:46:00,서울특별시 강서구 강서로18길 52-5,126.848703,37.534293,Weekday,4170,11531,4170_11531,0,4170_11531_0


In [18]:
parks_path = 'parks.csv'
parks = pd.read_csv(parks_path)
parks.head()

Unnamed: 0,주소,운영구분,총주차면,평일유료,토요일유료,공휴일유료,평일시작,평일종료,토요일시작,토요일종료,공휴일시작,공휴일종료,경도,위도,1시간 요금
0,강남구 개포동 126-2,1,132.0,Y,Y,Y,09:00:00,19:00:00,00:00:00,00:00:00,00:00:00,00:00:00,127.066477,37.477263,2400.0
1,강남구 개포동 1266-0,1,97.0,Y,N,N,00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,127.048218,37.481496,2400.0
2,강남구 개포동 13-2,1,168.0,Y,N,N,00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,127.079307,37.494938,1200.0
3,강남구 개포동 567-23,1,92.0,Y,Y,Y,09:00:00,19:00:00,00:00:00,00:00:00,00:00:00,00:00:00,127.065835,37.477888,2400.0
4,강남구 논현동 168-0,1,192.0,Y,N,N,00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,127.02629,37.508171,3600.0


In [10]:
parks.rename(columns={
    '위도': 'latitude',
    '경도': 'longitude',
    '주소': 'address'
}, inplace=True)

In [19]:
# drop columns except for latitude, longitude, and address
parks = parks[['주소','총주차면','경도','위도']]

In [27]:
parks.head()

Unnamed: 0,주소,총주차면,경도,위도
0,강남구 개포동 126-2,132.0,127.066477,37.477263
1,강남구 개포동 1266-0,97.0,127.048218,37.481496
2,강남구 개포동 13-2,168.0,127.079307,37.494938
3,강남구 개포동 567-23,92.0,127.065835,37.477888
4,강남구 논현동 168-0,192.0,127.02629,37.508171


In [52]:
def haversine_array(lat, lon, coords):
    R = 6371000  # m
    φ1, λ1 = np.radians(lat), np.radians(lon)
    φ2, λ2 = np.radians(coords[:,0]), np.radians(coords[:,1])
    dφ, dλ = φ2 - φ1, λ2 - λ1
    a = np.sin(dφ/2)**2 + np.cos(φ1)*np.cos(φ2)*np.sin(dλ/2)**2
    return 2 * R * np.arcsin(np.sqrt(a))

In [29]:
complaint_counts = []
for _, park in parks.iterrows():
    dists = haversine_array(park['경도'], park['위도'], coords)
    complaint_counts.append((dists <= 500).sum())
df = parks.copy()
df['민원 수'] = complaint_counts

In [30]:
df

Unnamed: 0,주소,총주차면,경도,위도,민원 수
0,강남구 개포동 126-2,132.0,127.066477,37.477263,324
1,강남구 개포동 1266-0,97.0,127.048218,37.481496,5464
2,강남구 개포동 13-2,168.0,127.079307,37.494938,2859
3,강남구 개포동 567-23,92.0,127.065835,37.477888,350
4,강남구 논현동 168-0,192.0,127.026290,37.508171,36734
...,...,...,...,...,...
1458,중구 흥인동 162-1 0,20.0,127.017298,37.570006,29435
1459,중랑구 망우동 486-10,56.0,127.099071,37.598946,5531
1460,중랑구 면목동 1-4,119.0,127.089299,37.574183,3129
1461,중랑구 면목동 168-2,567.0,127.081451,37.579494,3398


In [31]:
df.to_csv('parks_complaints.csv', index=False)

In [32]:
cctv_path = 'cctv.csv'
cctv = pd.read_csv(cctv_path)
cctv.head()

Unnamed: 0,고정형CCTV지번주소,latitude,longitude
0,양천구 목4동 762-10,37.535891,126.870566
1,양천구 목4동 797-8,37.53274,126.867191
2,양천구 신월2동 496,37.524535,126.848224
3,양천구 신월7동 928-1,37.522349,126.833529
4,양천구 목1동 917,37.528982,126.874597


In [42]:
cctv.shape

(4454, 3)

In [56]:
cctv_coords = cctv[['longitude', 'latitude']].to_numpy()
cctv_coords

array([[126.870566  ,  37.535891  ],
       [126.867191  ,  37.53274   ],
       [126.84822368,  37.52453482],
       ...,
       [127.13759309,  37.49835551],
       [126.83830078,  37.54039277],
       [126.8784292 ,  37.50927074]], shape=(4454, 2))

In [57]:
cctv_counts = []
for _, park in parks.iterrows():
    dists = haversine_array(park['경도'], park['위도'], cctv_coords)
    cctv_counts.append((dists <= 500).sum())
parks_cctv = parks.copy()
parks_cctv['CCTV 수'] = cctv_counts

In [58]:
parks_cctv

Unnamed: 0,주소,총주차면,경도,위도,CCTV 수
0,강남구 개포동 126-2,132.0,127.066477,37.477263,0
1,강남구 개포동 1266-0,97.0,127.048218,37.481496,7
2,강남구 개포동 13-2,168.0,127.079307,37.494938,9
3,강남구 개포동 567-23,92.0,127.065835,37.477888,0
4,강남구 논현동 168-0,192.0,127.026290,37.508171,20
...,...,...,...,...,...
1458,중구 흥인동 162-1 0,20.0,127.017298,37.570006,41
1459,중랑구 망우동 486-10,56.0,127.099071,37.598946,7
1460,중랑구 면목동 1-4,119.0,127.089299,37.574183,6
1461,중랑구 면목동 168-2,567.0,127.081451,37.579494,9


In [59]:
# append the '민원 수' column from df to parks_cctv
parks_cctv['민원 수'] = df['민원 수']
parks_cctv.head()

Unnamed: 0,주소,총주차면,경도,위도,CCTV 수,민원 수
0,강남구 개포동 126-2,132.0,127.066477,37.477263,0,324
1,강남구 개포동 1266-0,97.0,127.048218,37.481496,7,5464
2,강남구 개포동 13-2,168.0,127.079307,37.494938,9,2859
3,강남구 개포동 567-23,92.0,127.065835,37.477888,0,350
4,강남구 논현동 168-0,192.0,127.02629,37.508171,20,36734


In [61]:
parks_cctv.to_csv('parks_cctv.csv', index=False)