## 설정 및 공통 상수 셸

In [10]:
import pandas as pd
import requests
import time
import os
from dotenv import load_dotenv

load_dotenv()

KAKAO_REST_API_KEY = os.getenv("KAKAO_REST_API_KEY")

HEADERS = {
    "Authorization": f"KakaoAK {KAKAO_REST_API_KEY}"
}

ADDRESS_URL = "https://dapi.kakao.com/v2/local/search/address.json"
CATEGORY_URL = "https://dapi.kakao.com/v2/local/search/category.json"
COORD2REGION_URL = "https://dapi.kakao.com/v2/local/geo/coord2regioncode.json"

CATEGORIES = {
    "CS2": "편의점",
    "MT1": "대형마트",
    "PK6": "주차장",
    "BK9": "은행",
    "FD6": "음식점",
    "CE7": "카페",
    "HP8": "병원",
    "PM9": "약국",
}

OUTPUT_PATH = "../output/seoul_legal_dong_category_count.csv"
RADIUS = 800

##
print(KAKAO_REST_API_KEY)

8d5e873d4ae52b36838aef3796e62a8e


## 법정동 CSV 로드 & 서울 필터링

In [11]:
seoul_dong_df = dong_df[
    (dong_df["시도명"] == "서울특별시") &
    (dong_df["시군구명"].notna()) &
    (dong_df["읍면동명"].notna()) &
    (dong_df["읍면동명"] != "")
].copy()

print(seoul_dong_df.shape)


(1086, 9)


## 좌표로 행정구역정보 변환
- [Kakao developers | 문서 > 로컬 > REST API > 좌표로 행정구역정보 변환](https://developers.kakao.com/docs/latest/ko/local/dev-guide)
- 좌표계에 대한 좌표값을 받아 해당 좌표에 부합하는 **행정동, 법정동**을 얻는 API
    - 행정동 (region_type == "H")
    - 법정동 (region_type == "B")

In [12]:
def coord_to_bcode(x, y, sleep=0.05):
    res = requests.get(
        COORD2REGION_URL,
        headers=HEADERS,
        params={"x": x, "y": y, "input_coord": "WGS84"}
    )

    if res.status_code != 200:
        return None

    for doc in res.json().get("documents", []):
        if doc.get("region_type") == "B":
            time.sleep(sleep)
            return doc.get("code")

    time.sleep(sleep)
    return None


In [13]:
def fetch_places(category, x, y, radius):
    page = 1
    rows = []

    while True:
        res = requests.get(
            CATEGORY_URL,
            headers=HEADERS,
            params={
                "category_group_code": category,
                "x": x,
                "y": y,
                "radius": radius,
                "page": page,
                "size": 15
            }
        ).json()

        rows.extend(res.get("documents", []))

        if res["meta"]["is_end"]:
            break

        page += 1
        time.sleep(0.05)

    return rows


In [14]:
def dong_to_coord(address, sleep=0.1):
    res = requests.get(
        ADDRESS_URL,
        headers=HEADERS,
        params={"query": address}
    )

    if res.status_code != 200:
        return None, None

    docs = res.json().get("documents", [])
    if not docs:
        return None, None

    time.sleep(sleep)
    return float(docs[0]["x"]), float(docs[0]["y"])


## 정상적으로 실행되는지 테스트

In [15]:
test_row = seoul_dong_df.iloc[0]
address = f"서울특별시 {test_row['시군구명']} {test_row['읍면동명']}"

x, y = dong_to_coord(address)
print("TEST address:", address)
print("TEST x,y:", x, y)

places = fetch_places("CS2", x, y, 800)
print("CS2 places count:", len(places))

if places:
    print(places[0])


TEST address: 서울특별시 종로구 청운동
TEST x,y: 126.969329763593 37.5891974378627
CS2 places count: 7
{'address_name': '서울 종로구 청운동 63', 'category_group_code': 'CS2', 'category_group_name': '편의점', 'category_name': '가정,생활 > 편의점 > 세븐일레븐', 'distance': '318', 'id': '1191544417', 'phone': '1577-0711', 'place_name': '세븐일레븐 종로청운점', 'place_url': 'http://place.map.kakao.com/1191544417', 'road_address_name': '서울 종로구 자하문로 115', 'x': '126.969666067717', 'y': '37.5863395760336'}


In [18]:
dong_df = pd.read_csv("../raw/legal_dong.csv")

seoul_dong_df = dong_df[
    (dong_df["시도명"] == "서울특별시") &
    (dong_df["시군구명"].notna()) &
    (dong_df["읍면동명"].notna()) &
    (dong_df["읍면동명"] != "")
].copy()

done_bcodes = set()

if os.path.exists(OUTPUT_PATH) and os.path.getsize(OUTPUT_PATH) > 0:
    done_df = pd.read_csv(OUTPUT_PATH)
    done_bcodes = set(done_df["법정동코드"].astype(str))
    print("이미 처리된 법정동 수:", len(done_bcodes))
else:
    print("기존 결과 없음 (처음 실행)")

# CSV 헤더 최초 1회만 작성
if not os.path.exists(OUTPUT_PATH):
    pd.DataFrame(
        columns=["법정동코드", "category_group_code", "count"]
    ).to_csv(OUTPUT_PATH, index=False)
    
for idx, row in seoul_dong_df.iterrows():
    bcode = str(row["법정동코드"])

    if bcode in done_bcodes:
        continue

    address = f"서울특별시 {row['시군구명']} {row['읍면동명']}"

    x, y = dong_to_coord(address)
    if x is None:
        continue

    rows_to_save = []

    for category in CATEGORIES.keys():
        places = fetch_places(category, x, y, RADIUS)

        count = 0
        for p in places:
            place_bcode = coord_to_bcode(p["x"], p["y"])
            if place_bcode == bcode:
                count += 1

        rows_to_save.append({
            "법정동코드": bcode,
            "category_group_code": category,
            "count": count
        })

    pd.DataFrame(rows_to_save).to_csv(
        OUTPUT_PATH,
        mode="a",
        header=False,
        index=False
    )

    time.sleep(0.3)


이미 처리된 법정동 수: 21
