In [None]:
import os
import pandas as pd
import numpy as np
import requests
from dotenv import load_dotenv
from sklearn.neighbors import BallTree

load_dotenv()
KAKAO_API_KEY = os.getenv("KAKAO_API_KEY")
HEADERS = {"Authorization": f"KakaoAK {KAKAO_API_KEY}"}

In [None]:
poi = pd.read_csv("result/카카오맵_POI.csv")
poi = poi.rename(columns={"place_name": "name", "group_name": "category"})
poi["id"] = [f"kko_{i:04d}" for i in range(len(poi))]
poi = poi[["id", "name", "category", "x", "y"]]

bus = pd.read_csv("data/세종특별자치시_정류장정보_20250418.csv", encoding="cp949")
bus = bus.rename(columns={
    "정류장 고유번호": "id",
    "정류장 명": "name",
    "경도": "x",
    "위도": "y"
})
bus["category"] = "버스정류장"
bus = bus[["id", "name", "category", "x", "y"]]

combined = pd.concat([poi, bus], ignore_index=True)
print("병합 완료:", combined.shape)
print("category 종류:", combined["category"].value_counts())
combined.to_csv("output/POI_버스추가.csv", index=False, encoding="utf-8")

In [None]:
df = pd.read_csv("data/공공데이터포탈_세종특별자치시_POI.csv", encoding="cp949")
df = df.drop(columns=["adress"], errors="ignore")

def get_coords(query):
    try:
        url = f"https://dapi.kakao.com/v2/local/search/keyword.json?query={query}"
        res = requests.get(url, headers=HEADERS)
        result = res.json()
        if result["documents"]:
            return result["documents"][0]["x"], result["documents"][0]["y"]
    except:
        return None, None
    return None, None

missing = df[df["x"].isna() | df["y"].isna()]
print(f"❗ 좌표 결측치 있는 항목 수: {len(missing)}")

df["x"], df["y"] = zip(*[
    get_coords(name) if pd.isna(x) or pd.isna(y) else (x, y)
    for name, x, y in zip(df["name"], df["x"], df["y"])
])

df.to_csv("output/POI_좌표보완.csv", index=False, encoding="utf-8")

In [None]:
df = pd.read_csv("output/POI_좌표보완.csv")

print("📋 category 원본 종류:", df["category"].nunique())
print(df["category"].value_counts())

SECOND_TO_FIRST = {
    "음식점": "음식점", "카페": "카페", "편의점": "편의점",
    "병원": "병원", "공공기관": "공공기관", "문화시설": "문화시설",
    "중개업소": "중개업소", "숙박": "숙박", "생활서비스": "생활서비스"
}

FIRST_TO_CODE = {
    "음식점": "FD6", "카페": "CE7", "편의점": "CS2",
    "병원": "HP8", "공공기관": "PO3", "문화시설": "CT1",
    "중개업소": "AG2", "숙박": "AD5", "생활서비스": "CT1"
}

df["category_2nd"] = df["category"]
df["category_1st"] = df["category_2nd"].map(SECOND_TO_FIRST)
df["category_code"] = df["category_1st"].map(FIRST_TO_CODE)

print("카테고리 정제 완료")
print("1차 카테고리 종류:", df["category_1st"].value_counts())


In [None]:
df = df.dropna(subset=["x", "y"]).copy()
coords = np.radians(df[["y", "x"]].astype(float).values)
tree = BallTree(coords, metric="haversine")
radius_km = 0.05 / 6371

name_counts = df["name"].value_counts()
multi_names = name_counts[name_counts > 1].index
dup_idx = set()

for i, row in df.iterrows():
    if row["name"] not in multi_names:
        continue
    indices = tree.query_radius([coords[i]], r=radius_km)[0]
    for j in indices:
        if i >= j:
            continue
        if df.loc[j, "name"] == row["name"]:
            dup_idx.add(j)

df = df.drop(index=dup_idx).reset_index(drop=True)
print(f"중복 제거 완료 - 남은 행 수: {len(df)}")

In [None]:
# 셀 6: 결과 저장
df.to_csv("result/세종특별자치시_POI_최종(1).csv", index=False, encoding="utf-8")
print("저장 완료: result/세종특별자치시_POI_최종.csv")