In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import geopandas as gpd

# dbfread 없으면 자동 설치 시도
try:
    from dbfread import DBF as DBFReader
except ImportError:
    import sys, subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "dbfread"])
    from dbfread import DBF as DBFReader


PROJECT_ROOT = Path("..")
RAW_DIR = PROJECT_ROOT / "data" / "raw"
PROCESSED_DIR = PROJECT_ROOT / "data" / "processed"
RAW_DIR.mkdir(parents=True, exist_ok=True)
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)


GRID_PATH = PROCESSED_DIR / "seongsu_grid_250m_enriched.geojson"

# VWorld 건물집합정보 AL_D164
SHP = RAW_DIR / "vworld_building" / "AL_D164_11_20250715" / "AL_D164_11_20250715.shp"
DBF_PATH = SHP.with_suffix(".dbf")


OUT_CSV = PROCESSED_DIR / "seongsu_grid_250m_building_density.csv"

print("GRID_PATH:", GRID_PATH)
print("SHP:", SHP)
print("OUT:", OUT_CSV)


GRID_PATH: ..\data\processed\seongsu_grid_250m_enriched.geojson
SHP: ..\data\raw\vworld_building\AL_D164_11_20250715\AL_D164_11_20250715.shp
OUT: ..\data\processed\seongsu_grid_250m_building_density.csv


In [None]:
# load grid + build bld_fixed
if not GRID_PATH.exists():
    raise FileNotFoundError(f"GRID_PATH가 없어: {GRID_PATH}")

if not SHP.exists():
    raise FileNotFoundError(f"SHP가 없어: {SHP}")

if not DBF_PATH.exists():
    # 대문자 고려
    cand = list(SHP.parent.glob(SHP.stem + ".DBF"))
    if cand:
        DBF_PATH = cand[0]
    else:
        raise FileNotFoundError(f"DBF가 없어: {DBF_PATH}")

grid = gpd.read_file(GRID_PATH)

# geometry는 shp로, 속성은 dbf(cp949)로
bld_geom = gpd.read_file(SHP)
attrs = pd.DataFrame(iter(DBFReader(str(DBF_PATH), encoding="cp949", ignore_missing_memofile=True)))

if len(attrs) != len(bld_geom):
    raise ValueError(f"SHP/DBF 행 수 불일치: shp={len(bld_geom)}, dbf={len(attrs)}")

bld_fixed = gpd.GeoDataFrame(
    pd.concat([attrs, bld_geom[["geometry"]]], axis=1),
    geometry="geometry",
    crs=bld_geom.crs
)

# 컬럼정의서 기준: A29=건축물용도코드, A30=건축물용도명
for col in ["A29", "A30"]:
    if col not in bld_fixed.columns:
        raise ValueError(f"{col} 컬럼이 없어. 다른 레이어를 받은 걸 수도 있음. 현재 cols={list(bld_fixed.columns)[:10]}...")

print("grid rows:", len(grid), "| crs:", grid.crs)
print("bld rows:", len(bld_fixed), "| crs:", bld_fixed.crs)
print("A30 sample:", bld_fixed["A30"].dropna().astype(str).head(10).tolist())


grid rows: 109 | crs: EPSG:4326
bld rows: 110475 | crs: EPSG:5186
A30 sample: ['업무시설', '제2종근린생활시설', '제2종근린생활시설', '제1종근린생활시설', '공동주택', '업무시설', '업무시설', '업무시설', '공동주택', '공동주택']


In [None]:
# building density only (save)
import numpy as np
import pandas as pd
import geopandas as gpd

# grid_id 컬럼 찾기
GRID_ID_COL = None
for c in ["grid_id", "GRID_ID", "gid", "GID", "id"]:
    if c in grid.columns:
        GRID_ID_COL = c
        break
if GRID_ID_COL is None:
    raise ValueError(f"grid_id 컬럼 못 찾음. grid columns={list(grid.columns)}")

# CRS 맞추기
bld = bld_fixed
if bld.crs != grid.crs:
    bld = bld.to_crs(grid.crs)

# AOI clip
aoi = grid.union_all()
bld = bld[bld.geometry.notnull()].copy()
bld["geometry"] = bld.geometry.buffer(0)
bld = gpd.clip(bld, aoi)
bld = bld[bld.geometry.notnull()].copy()
print("clipped buildings:", len(bld))

# 용도 그룹 (A30=건축물용도명)
USE_COL = "A30"

def map_use_name_to_group(x):
    if x is None or (isinstance(x, float) and np.isnan(x)):
        return "unknown"
    s = str(x).strip()
    if s == "" or s.lower() == "nan":
        return "unknown"

    # 주거
    if any(k in s for k in ["단독주택","공동주택","아파트","연립","다세대","다가구","기숙사","주택","오피스텔"]):
        return "residential"

    # 상업/업무/근생
    if any(k in s for k in ["근린생활","판매시설","업무시설","숙박시설","위락시설","운수시설",
                            "의료시설","교육연구시설","노유자시설","문화및집회시설"]):
        return "commercial"

    # 공장/창고 etc 처리
    return "etc"

bld["use_group"] = bld[USE_COL].map(map_use_name_to_group)
print("use_group counts:\n", bld["use_group"].value_counts().head(10))

# 4) centroid → grid join (경고 방지: 투영좌표계에서 centroid)
bld_tmp = bld.to_crs("EPSG:5179")
bld_pts = bld_tmp.copy()
bld_pts["geometry"] = bld_tmp.geometry.centroid
bld_pts = bld_pts.to_crs(grid.crs)

joined = gpd.sjoin(
    bld_pts[["use_group","geometry"]],
    grid[[GRID_ID_COL,"geometry"]],
    how="inner",
    predicate="within"
)

cnt = joined.groupby([GRID_ID_COL, "use_group"]).size().unstack(fill_value=0).reset_index()

# 5) 면적(km²) + 밀도(개/km²)
grid_area = grid[[GRID_ID_COL, "geometry"]].copy()
grid_area_m = grid_area.to_crs("EPSG:5179")
grid_area["area_km2"] = grid_area_m.geometry.area / 1_000_000

out = grid_area[[GRID_ID_COL, "area_km2"]].merge(cnt, on=GRID_ID_COL, how="left").fillna(0)

out["commercial_cnt"] = out["commercial"] if "commercial" in out.columns else 0
out["residential_cnt"] = out["residential"] if "residential" in out.columns else 0

out["commercial_density_raw"] = out["commercial_cnt"] / out["area_km2"]
out["residential_density_raw"] = out["residential_cnt"] / out["area_km2"]

# 6) 0~1 분위 정규화(rank01)
def rank01(s: pd.Series):
    r = s.rank(method="average").to_numpy()
    return (r - 1) / (len(r) - 1 + 1e-9)

out["commercial_density"] = rank01(out["commercial_density_raw"])
out["residential_density"] = rank01(out["residential_density_raw"])

# 7) 컬럼명 통일 + 저장
out = out.rename(columns={GRID_ID_COL: "grid_id"})

SAVE_COLS = [
    "grid_id",
    "commercial_cnt", "residential_cnt",
    "commercial_density_raw", "residential_density_raw",
    "commercial_density", "residential_density"
]

OUT_BUILDING = PROCESSED_DIR / "seongsu_grid_250m_building_density_only.csv"
out[SAVE_COLS].to_csv(OUT_BUILDING, index=False, encoding="utf-8-sig")
print("✅ saved:", OUT_BUILDING, "| rows:", len(out), "| cols:", len(SAVE_COLS))

display(out[SAVE_COLS].head(10))


clipped buildings: 571
use_group counts:
 use_group
residential    479
commercial      56
etc             36
Name: count, dtype: int64
✅ saved: ..\data\processed\seongsu_grid_250m_building_density_only.csv | rows: 109 | cols: 7


Unnamed: 0,grid_id,commercial_cnt,residential_cnt,commercial_density_raw,residential_density_raw,commercial_density,residential_density
0,0,0.0,0.0,0.0,0.0,0.333333,0.25463
1,1,0.0,0.0,0.0,0.0,0.333333,0.25463
2,2,0.0,0.0,0.0,0.0,0.333333,0.25463
3,3,0.0,0.0,0.0,0.0,0.333333,0.25463
4,4,0.0,0.0,0.0,0.0,0.333333,0.25463
5,5,0.0,0.0,0.0,0.0,0.333333,0.25463
6,6,0.0,0.0,0.0,0.0,0.333333,0.25463
7,7,0.0,0.0,0.0,0.0,0.333333,0.25463
8,8,0.0,0.0,0.0,0.0,0.333333,0.25463
9,9,0.0,14.0,0.0,224.0,0.333333,0.925926
