In [1]:
import os
from pathlib import Path

os.chdir(Path.cwd().parent)

print("CWD =", Path.cwd())

CWD = C:\Users\A\OneDrive\바탕 화면\seoul-dimming-system


In [None]:
from pathlib import Path


SEOUL_GEOJSON = r"data/raw/hangjeongdong_서울특별시.geojson"
LIGHTS_CSV   = r"data/raw/서울특별시_가로등 위치 정보_20221108.csv"
CCTV_CSV     = r"data/raw/서울시 안심이 CCTV 연계 현황.csv"
PARKS_CSV    = r"data/raw/서울시 주요 공원현황.csv"

# 출력 폴더 명시
BOUNDARY_OUT = Path("data/boundary")
CACHE_OUT    = Path("data/cache")
PROCESSED    = Path("data/processed")

BOUNDARY_OUT.mkdir(parents=True, exist_ok=True)
CACHE_OUT.mkdir(parents=True, exist_ok=True)
PROCESSED.mkdir(parents=True, exist_ok=True)

SEONGSU_BOUNDARY_PATH = BOUNDARY_OUT / "seongsu_boundary.geojson"
GRID_250_PATH = CACHE_OUT / "seongsu_grid_250m.geojson"

In [None]:
#서울 행정동 GeoJSON에서 성수 필터링 -> boundary

import geopandas as gpd

TARGET_DONGS = ["성수1가1동", "성수1가2동", "성수2가1동", "성수2가3동"]

gdf = gpd.read_file(SEOUL_GEOJSON)

# geojson 속성 컬럼('adm_nm')
name_col = "adm_nm"

# adm_nm 값 : "서울특별시 성동구 성수1가1동" -> endswith/contains 둘 다 가능
mask = gdf[name_col].astype(str).str.contains("|".join(TARGET_DONGS), na=False)
seongsu_parts = gdf[mask].copy()

print("matched rows =", len(seongsu_parts))
print(seongsu_parts[name_col].drop_duplicates().head(20))

# merge
seongsu_u = gpd.GeoDataFrame(geometry=[seongsu_parts.geometry.union_all()], crs=gdf.crs)


# 지도/folium 용으로 WGS84 저장
seongsu_u = seongsu_u.to_crs("EPSG:4326")
seongsu_u.to_file(SEONGSU_BOUNDARY_PATH, driver="GeoJSON")

print("saved boundary ->", SEONGSU_BOUNDARY_PATH)


matched rows = 4
56    서울특별시 성동구 성수1가1동
57    서울특별시 성동구 성수1가2동
58    서울특별시 성동구 성수2가1동
59    서울특별시 성동구 성수2가3동
Name: adm_nm, dtype: object
saved boundary -> data\boundary\seongsu_boundary.geojson


In [5]:
# 성수 boundary 위에 250m 격자 만들고 cache에 저장

import numpy as np
from shapely.geometry import box

meter_crs = "EPSG:5179"   # 미터 단위 좌표계
cell = 250                # 250m

seongsu = gpd.read_file(SEONGSU_BOUNDARY_PATH).to_crs(meter_crs)

minx, miny, maxx, maxy = seongsu.total_bounds
xs = np.arange(minx, maxx, cell)
ys = np.arange(miny, maxy, cell)

polys = [box(x, y, x + cell, y + cell) for x in xs for y in ys]
grid = gpd.GeoDataFrame({"geometry": polys}, crs=meter_crs)

# 정사각형 유지하면서 성수랑 '겹치는' 셀만 남기기
grid = grid[grid.intersects(seongsu.geometry.iloc[0])].copy().reset_index(drop=True)
grid["grid_id"] = np.arange(len(grid))

# 저장은 WGS84로(지도용)
grid_wgs = grid.to_crs("EPSG:4326")
grid_wgs.to_file(GRID_250_PATH, driver="GeoJSON")

print("saved grid ->", GRID_250_PATH, "cells =", len(grid_wgs))


saved grid -> data\cache\seongsu_grid_250m.geojson cells = 109


In [None]:
# CSV(가로등/CCTV/공원) 공통 로더 + 성수 필터 함수

import pandas as pd
import geopandas as gpd

def read_csv_auto(path):
    # 인코딩 자동
    for enc in ["utf-8-sig", "utf-8", "cp949", "euc-kr"]:
        try:
            return pd.read_csv(path, encoding=enc)
        except Exception:
            pass
    return pd.read_csv(path)

def to_points_gdf(df, lon_col, lat_col, src_crs="EPSG:4326"):
    df = df.copy()
    df["lon"] = pd.to_numeric(df[lon_col], errors="coerce")
    df["lat"] = pd.to_numeric(df[lat_col], errors="coerce")
    df = df.dropna(subset=["lon", "lat"]).copy()

    gdf = gpd.GeoDataFrame(
        df,
        geometry=gpd.points_from_xy(df["lon"], df["lat"]),
        crs=src_crs
    )
    return gdf

def filter_within_boundary(points_gdf, boundary_gdf_m):  # boundary는 meter_crs로 들어와야 함
    pts = points_gdf.to_crs(boundary_gdf_m.crs)
    return gpd.sjoin(pts, boundary_gdf_m, predicate="within", how="inner").drop(columns=["index_right"])


In [7]:
# 성수 boundary/grid (미터) 로드

seongsu_m = gpd.read_file(SEONGSU_BOUNDARY_PATH).to_crs(meter_crs)
grid_m = gpd.read_file(GRID_250_PATH).to_crs(meter_crs)[["grid_id", "geometry"]]


In [8]:
# 가로등 전처리: 성수만 추출 + 저장
#    파일 컬럼: 위도, 경도

lights_df = read_csv_auto(LIGHTS_CSV)
lights_gdf = to_points_gdf(lights_df, lon_col="경도", lat_col="위도")
lights_s = filter_within_boundary(lights_gdf, seongsu_m)

out_lights = PROCESSED / "streetlights_seongsu.csv"
lights_s.drop(columns="geometry").to_csv(out_lights, index=False, encoding="utf-8-sig")
print("streetlights seongsu rows =", len(lights_s), "->", out_lights)


streetlights seongsu rows = 801 -> data\processed\streetlights_seongsu.csv


In [9]:
# CCTV 전처리: 성수만 추출 + 저장
#    파일 컬럼: 위도, 경도

cctv_df = read_csv_auto(CCTV_CSV)
cctv_gdf = to_points_gdf(cctv_df, lon_col="경도", lat_col="위도")
cctv_s = filter_within_boundary(cctv_gdf, seongsu_m)

out_cctv = PROCESSED / "cctv_seongsu.csv"
cctv_s.drop(columns="geometry").to_csv(out_cctv, index=False, encoding="utf-8-sig")
print("cctv seongsu rows =", len(cctv_s), "->", out_cctv)


cctv seongsu rows = 1073 -> data\processed\cctv_seongsu.csv


In [None]:
# 공원 전처리: 성수만 추출 + 저장
# 공원 파일은 WGS84 좌표가 따로 있음
# X좌표(WGS84)=경도, Y좌표(WGS84)=위도

parks_df = read_csv_auto(PARKS_CSV)
parks_gdf = to_points_gdf(parks_df, lon_col="X좌표(WGS84)", lat_col="Y좌표(WGS84)")
parks_s = filter_within_boundary(parks_gdf, seongsu_m)

out_parks = PROCESSED / "parks_seongsu.csv"
parks_s.drop(columns="geometry").to_csv(out_parks, index=False, encoding="utf-8-sig")
print("parks seongsu rows =", len(parks_s), "->", out_parks)


parks seongsu rows = 2 -> data\processed\parks_seongsu.csv


In [11]:
# 격자별 집계(grid_id 매핑)

def count_by_grid(points_gdf_m, grid_m, col_name):
    j = gpd.sjoin(points_gdf_m[["geometry"]], grid_m, predicate="within", how="left")
    counts = j.groupby("grid_id").size().rename(col_name).reset_index()
    out = grid_m.merge(counts, on="grid_id", how="left")
    out[col_name] = out[col_name].fillna(0).astype(int)
    return out

lights_m = lights_s.to_crs(meter_crs)
cctv_m   = cctv_s.to_crs(meter_crs)
parks_m  = parks_s.to_crs(meter_crs)

grid_stats = grid_m.copy()
grid_stats = grid_stats.merge(count_by_grid(lights_m, grid_m, "streetlight_cnt")[["grid_id", "streetlight_cnt"]], on="grid_id")
grid_stats = grid_stats.merge(count_by_grid(cctv_m,   grid_m, "cctv_cnt")[["grid_id", "cctv_cnt"]], on="grid_id")
grid_stats = grid_stats.merge(count_by_grid(parks_m,  grid_m, "park_cnt")[["grid_id", "park_cnt"]], on="grid_id")

out_stats_csv = PROCESSED / "seongsu_grid_stats_250m.csv"
grid_stats.drop(columns="geometry").to_csv(out_stats_csv, index=False, encoding="utf-8-sig")
print("saved stats ->", out_stats_csv)


saved stats -> data\processed\seongsu_grid_stats_250m.csv


In [None]:
# 지도 시각화용: 격자 GeoJSON에 집계 붙여 저장

out_grid_geojson = PROCESSED / "seongsu_grid_250m_enriched.geojson"
grid_stats.to_crs("EPSG:4326").to_file(out_grid_geojson, driver="GeoJSON")
print("saved enriched grid ->", out_grid_geojson)


saved enriched grid -> data\processed\seongsu_grid_250m_enriched.geojson


In [14]:
from pathlib import Path
Path("data/processed/seongsu_grid_stats_250m.csv").exists()

True

In [15]:
import pandas as pd

stats = pd.read_csv("data/processed/seongsu_grid_stats_250m.csv")
stats[["streetlight_cnt","cctv_cnt","park_cnt"]].describe()

Unnamed: 0,streetlight_cnt,cctv_cnt,park_cnt
count,109.0,109.0,109.0
mean,7.348624,9.844037,0.018349
std,13.635721,10.924127,0.134829
min,0.0,0.0,0.0
25%,0.0,0.0,0.0
50%,0.0,6.0,0.0
75%,13.0,20.0,0.0
max,84.0,35.0,1.0
