In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import geopandas as gpd

PROCESSED_DIR = Path("..") / "data" / "processed"

GRID_GEOJSON_PATH = PROCESSED_DIR / "seongsu_grid_250m_enriched.geojson"
GRID_STATS_PATH   = PROCESSED_DIR / "seongsu_grid_stats_250m.csv"   
PARKS_CSV_PATH    = PROCESSED_DIR / "parks_seongsu.csv"            

OUT_PATH          = PROCESSED_DIR / "grid_features_base.csv"


CFG = {
    "in": {
        "grid_id": "grid_id",      # grid geojson & stats 공통 키
        "cctv_cnt": "cctv_cnt",    # stats에서 CCTV count 컬럼
        # parks csv 위경도 컬럼명 (자동탐지 안 되면 여기 수정)
        "park_lon": None,          # 예: "lon" / "longitude" / "x"
        "park_lat": None,          # 예: "lat" / "latitude" / "y"
    },
    "out": {
        "grid_id": "grid_id",          # 예: "grid"
        "cctv_density": "cctv_density",# 예: "cctv"
        "park_yn_50m": "park_in_grid",  # 예: "park"
    },
    "params": {
        "park_radius_m": 50,      # 50m
        "metric_epsg": 5179       # Korea TM(미터 단위). 5186/5181 써도 됨
    }
}

print("GRID_GEOJSON:", GRID_GEOJSON_PATH.exists(), GRID_GEOJSON_PATH)
print("GRID_STATS  :", GRID_STATS_PATH.exists(), GRID_STATS_PATH)
print("PARKS_CSV   :", PARKS_CSV_PATH.exists(), PARKS_CSV_PATH)

grid = gpd.read_file(GRID_GEOJSON_PATH)
grid_stats = pd.read_csv(GRID_STATS_PATH)
parks_df = pd.read_csv(PARKS_CSV_PATH)

# 컬럼 존재 체크 (grid_id / cctv_cnt)
gid = CFG["in"]["grid_id"]
cctv = CFG["in"]["cctv_cnt"]

if gid not in grid.columns:
    raise ValueError(f"grid geojson에 '{gid}'가 없어. 현재 컬럼: {list(grid.columns)}")
if gid not in grid_stats.columns or cctv not in grid_stats.columns:
    raise ValueError(f"grid_stats에 '{gid}', '{cctv}'가 없어. 현재 컬럼: {list(grid_stats.columns)}")

grid_stats[[gid, cctv]].head()


GRID_GEOJSON: True ..\data\processed\seongsu_grid_250m_enriched.geojson
GRID_STATS  : True ..\data\processed\seongsu_grid_stats_250m.csv
PARKS_CSV   : True ..\data\processed\parks_seongsu.csv


Unnamed: 0,grid_id,cctv_cnt
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0


In [None]:
# 공원 컬럼 재계산 grid_features_base.csv

# 1) parks 위경도 컬럼 자동 탐지
park_lon = CFG["in"]["park_lon"]
park_lat = CFG["in"]["park_lat"]

parks_cols = [c.lower() for c in parks_df.columns]
parks_df.columns = parks_cols

if park_lon is None:
    for cand in ["lon", "longitude", "x", "lng"]:
        if cand in parks_cols:
            park_lon = cand
            break
if park_lat is None:
    for cand in ["lat", "latitude", "y"]:
        if cand in parks_cols:
            park_lat = cand
            break

if park_lon is None or park_lat is None:
    raise ValueError(f"parks_seongsu.csv에서 위경도 컬럼을 못 찾았어. 현재 컬럼: {list(parks_df.columns)}")

print("✅ parks lon/lat cols:", park_lon, park_lat)

# 2) parks -> GeoDataFrame (EPSG:4326 가정)
parks_gdf = gpd.GeoDataFrame(
    parks_df.copy(),
    geometry=gpd.points_from_xy(parks_df[park_lon], parks_df[park_lat]),
    crs="EPSG:4326"
)

# 3) grid CRS 보정 (성수:4326)
if grid.crs is None:
    grid = grid.set_crs("EPSG:4326", allow_override=True)

parks_gdf = parks_gdf.to_crs(grid.crs)

# 4) “격자 안에 공원 있으면 1” 
join = gpd.sjoin(
    grid[[gid, "geometry"]],
    parks_gdf[["geometry"]],
    how="left",
    predicate="intersects"   # 격자와 공원점이 겹치면(안에 있거나 경계) 매칭
)

park_in_grid = (
    join.groupby(gid)
        .apply(lambda x: int(x.index_right.notna().any()))
        .rename("park_in_grid")
        .reset_index()
)

# 5) 덮어쓰기
base = pd.read_csv(OUT_PATH, dtype={gid: str})
park_col_out = CFG["out"]["park_yn_50m"]  

base[gid] = base[gid].astype(str)
park_in_grid[gid] = park_in_grid[gid].astype(str)

base2 = base.drop(columns=[park_col_out], errors="ignore").merge(
    park_in_grid.rename(columns={"park_in_grid": park_col_out}),
    on=gid,
    how="left"
)
base2[park_col_out] = base2[park_col_out].fillna(0).astype(int)

base2.to_csv(OUT_PATH, index=False, encoding="utf-8-sig")
print("✅ updated:", OUT_PATH)
print(f"✅ {park_col_out}=1 grids:", int(base2[park_col_out].sum()), "/", len(base2))

base2[[gid, park_col_out]].head(10)


✅ parks lon/lat cols: lon lat
✅ updated: ..\data\processed\grid_features_base.csv
✅ park_in_grid=1 grids: 2 / 109


  .apply(lambda x: int(x.index_right.notna().any()))


Unnamed: 0,grid_id,park_in_grid
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0
5,5,0
6,6,0
7,7,0
8,8,0
9,9,0


In [None]:
from pathlib import Path
import pandas as pd
import geopandas as gpd

PROCESSED_DIR = Path("..") / "data" / "processed"

GRID_GEOJSON_PATH = PROCESSED_DIR / "seongsu_grid_250m_enriched.geojson"
PARKS_CSV_PATH    = PROCESSED_DIR / "parks_seongsu.csv"

# 교통량까지 합친 최종 파일 경로
FINAL_IN  = PROCESSED_DIR / "grid_features_base_plus_traffic3cols_20251201_1214_NEAREST.csv"
FINAL_OUT = PROCESSED_DIR / "grid_features_plus_traffic_fix_park_in_grid.csv"

# grid_id 컬럼명
GID = "grid_id"

# 공원 컬럼명 수정
PARK_COL_OUT = "park_in_grid"


# grid 로드
grid = gpd.read_file(GRID_GEOJSON_PATH)
grid.columns = [c.lower() for c in grid.columns]
if grid.crs is None:
    grid = grid.set_crs("EPSG:4326", allow_override=True)

if GID not in grid.columns:
    raise ValueError(f"grid에 {GID} 없음. cols={list(grid.columns)}")

# parks 로드 (CSV, 위경도 자동탐지)
parks_df = pd.read_csv(PARKS_CSV_PATH)
parks_df.columns = [c.lower() for c in parks_df.columns]

lon = None
lat = None
for cand in ["lon", "longitude", "lng", "x"]:
    if cand in parks_df.columns:
        lon = cand; break
for cand in ["lat", "latitude", "y"]:
    if cand in parks_df.columns:
        lat = cand; break

if lon is None or lat is None:
    raise ValueError(f"parks_seongsu.csv 위경도 컬럼 못 찾음. cols={list(parks_df.columns)}")

parks_gdf = gpd.GeoDataFrame(
    parks_df,
    geometry=gpd.points_from_xy(parks_df[lon], parks_df[lat]),
    crs="EPSG:4326"
).to_crs(grid.crs)

# 격자 안 공원 유무 계산
join = gpd.sjoin(
    grid[[GID, "geometry"]],
    parks_gdf[["geometry"]],
    how="left",
    predicate="intersects" # 경계 포함
)

park_flag = (
    join.assign(_has_park=join["index_right"].notna())
        .groupby(GID, as_index=False)["_has_park"]
        .any()
        .rename(columns={"_has_park": PARK_COL_OUT})
)
park_flag[PARK_COL_OUT] = park_flag[PARK_COL_OUT].astype(int)

print("✅ park=1 grids:", int(park_flag[PARK_COL_OUT].sum()), "/", len(park_flag))

# 최종 파일에 공원 컬럼만 덮어쓰기
final_df = pd.read_csv(FINAL_IN, dtype={GID: str})
final_df.columns = [c.lower() for c in final_df.columns]
# 컬럼명 소문자 통일
GID_L = GID.lower()
PARK_COL_L = PARK_COL_OUT.lower()

if GID_L not in final_df.columns:
    raise ValueError(f"최종파일에 {GID_L} 없음. cols={list(final_df.columns)}")

# 기존 공원컬럼 있으면 지우고 새로 merge
final_df = final_df.drop(columns=[PARK_COL_L], errors="ignore")
park_flag.columns = [c.lower() for c in park_flag.columns]
park_flag["grid_id"] = park_flag["grid_id"].astype(str)


final_df2 = final_df.merge(park_flag[[GID_L, PARK_COL_L]], on=GID_L, how="left")
final_df2[PARK_COL_L] = final_df2[PARK_COL_L].fillna(0).astype(int)

final_df2.to_csv(FINAL_OUT, index=False, encoding="utf-8-sig")
print("✅ saved:", FINAL_OUT)

final_df2[[GID_L, PARK_COL_L]].head(10)


✅ park=1 grids: 2 / 109
✅ saved: ..\data\processed\grid_features_plus_traffic_fix_park_in_grid.csv


Unnamed: 0,grid_id,park_in_grid
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0
5,5,0
6,6,0
7,7,0
8,8,0
9,9,0


In [None]:
import numpy as np
import pandas as pd

# 경로만 수정
FINAL_PATH = r"C:\Users\A\OneDrive\바탕 화면\seoul-dimming-system\data\processed\grid_features_final_seoungsu.csv"
BLD_PATH   = r"C:\Users\A\OneDrive\바탕 화면\seoul-dimming-system\data\processed\seongsu_grid_250m_building_density_only.csv"
OUT_PATH   = r"C:\Users\A\OneDrive\바탕 화면\seoul-dimming-system\data\processed\final_data_seoungsu.csv"

# 유틸
def rank01_from_series(s: pd.Series):
    s = s.astype(float)
    r = s.rank(method="average").to_numpy()
    return (r - 1) / (len(r) - 1 + 1e-9)

# 로드
final = pd.read_csv(FINAL_PATH)
bld = pd.read_csv(BLD_PATH)

# grid_id 컬럼 체크/통일
if "grid_id" not in final.columns:
    for c in ["GRID_ID","gid","GID","id"]:
        if c in final.columns:
            final = final.rename(columns={c:"grid_id"})
            break
if "grid_id" not in final.columns:
    raise ValueError(f"final에 grid_id가 없어. columns={list(final.columns)}")

if "grid_id" not in bld.columns:
    for c in ["GRID_ID","gid","GID","id"]:
        if c in bld.columns:
            bld = bld.rename(columns={c:"grid_id"})
            break

# 빌딩 밀집도 merge (0~1 스케일)
need_bld_cols = ["grid_id", "commercial_density", "residential_density"]
miss = [c for c in need_bld_cols if c not in bld.columns]
if miss:
    raise ValueError(f"빌딩 파일에 필요한 컬럼이 없어: {miss}\n현재 bld cols={list(bld.columns)}")

final = final.merge(bld[need_bld_cols], on="grid_id", how="left")
final[["commercial_density","residential_density"]] = final[["commercial_density","residential_density"]].fillna(0.0)

# night_traffic 만들기 (0~1 정규화)
traffic_cols = ["traffic_01_02", "traffic_02_03", "traffic_03_04"]
miss_t = [c for c in traffic_cols if c not in final.columns]
if miss_t:
    raise ValueError(f"final에 traffic 컬럼이 없어: {miss_t}\n현재 cols={list(final.columns)}")

final["night_traffic_sum"] = final[traffic_cols].sum(axis=1).astype(float)
final["night_traffic"] = rank01_from_series(final["night_traffic_sum"])  # 0~1

# commercial_index
final["commercial_index"] = (
    0.6 * final["commercial_density"]
    - 0.4 * final["residential_density"]
    + 0.2 * final["night_traffic"]
)

# existing_lx
q30 = np.quantile(final["commercial_index"], 0.30)
q70 = np.quantile(final["commercial_index"], 0.70)

final["existing_lx"] = np.where(
    final["commercial_index"] >= q70, 25,
    np.where(final["commercial_index"] <= q30, 10, 15)
).astype(int)

print("existing_lx ratio:")
print(final["existing_lx"].value_counts(normalize=True).sort_index().round(3))

inal = final.drop(columns=["park_within_50m"], errors="ignore")

# save
final.to_csv(OUT_PATH, index=False, encoding="utf-8-sig")
print("✅ saved:", OUT_PATH, "| rows:", len(final), "| cols:", final.shape[1])

display(final[["grid_id","commercial_density","residential_density","night_traffic","commercial_index","existing_lx"]].head(10))


existing_lx ratio:
existing_lx
10    0.394
15    0.046
25    0.560
Name: proportion, dtype: float64
✅ saved: C:\Users\A\OneDrive\바탕 화면\seoul-dimming-system\data\processed\final_data_seoungsu.csv | rows: 109 | cols: 13


Unnamed: 0,grid_id,commercial_density,residential_density,night_traffic,commercial_index,existing_lx
0,0,0.333333,0.25463,0.759259,0.25,25
1,1,0.333333,0.25463,0.759259,0.25,25
2,2,0.333333,0.25463,0.009259,0.1,10
3,3,0.333333,0.25463,0.009259,0.1,10
4,4,0.333333,0.25463,0.759259,0.25,25
5,5,0.333333,0.25463,0.759259,0.25,25
6,6,0.333333,0.25463,0.759259,0.25,25
7,7,0.333333,0.25463,0.759259,0.25,25
8,8,0.333333,0.25463,0.759259,0.25,25
9,9,0.333333,0.925926,0.009259,-0.168519,10
