In [49]:
# 📌 외부 py 파일 수정 시 자동으로 재로드되도록 설정
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [50]:
# ================================================
# 📦 공통 패키지 import
# ================================================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
import os
import sys

# src 경로 추가
PROJECT_ROOT = os.path.abspath(os.path.join(".."))
SRC_DIR = os.path.join(PROJECT_ROOT, "src")
if SRC_DIR not in sys.path:
    sys.path.append(SRC_DIR)

# seaborn 스타일 설정
sns.set(style="whitegrid")

# ================================================
# ⚙️ 경로 및 상수 설정
# ================================================
DATA_DIR = "../data/processed"
OUTPUT_DIR = "../data/modeling"

os.makedirs(OUTPUT_DIR, exist_ok=True)

# KMeans 설정
KMEANS_MODE = "manual"
KMEANS_MANUAL_K = 5

# XGBoost 설정
XGB_N_ESTIMATORS = 100

# MCLP 설정
COVERAGE_RADIUS = 0.02
FACILITY_LIMIT = 30

# ================================================
# 🧪 평가 함수 import
# ================================================
from evaluation.baseline_evaluator import (
    evaluate_existing_stations,
    evaluate_random_installation,
    evaluate_cluster_centers,
    evaluate_mclp_result
)

# ================================================
# 🧠 모델 함수 import
# ================================================
from modeling.kmeans_model import run_kmeans

# K means

In [51]:
# 데이터 로드
grid = pd.read_csv(f"{DATA_DIR}/grid_system_processed.csv")

# 수요가 가장 높은 클러스터만 추출
grid = run_kmeans(
    grid,
    mode=KMEANS_MODE,
    manual_k=KMEANS_MANUAL_K,
    return_top_cluster_only=True
)

[MANUAL MODE] 수동 설정 k = 5

[Cluster별 평균 수요]
cluster
2    3099.808511
3    1742.885714
1     959.328947
4     443.716312
0      12.835426
Name: demand_score, dtype: float64

[필터링] 수요가 가장 높은 클러스터 (cluster=2)만 반환됨.


In [52]:
# grid_features.csv 로드
features_all = pd.read_csv(f"{DATA_DIR}/grid_features.csv")

# KMeans로 추출한 수요 밀집 클러스터와 병합 (grid_id 기준)
features = features_all.merge(grid[['grid_id', 'cluster']], on='grid_id', how='inner')

# 확인
print(f"전체 feature 수: {len(features_all)}")
print(f"수요 밀집 클러스터 feature 수: {len(features)}")
print(f"사용 가능한 feature 컬럼: {features.columns.tolist()}")

전체 feature 수: 6030
수요 밀집 클러스터 feature 수: 47
사용 가능한 feature 컬럼: ['grid_id', 'center_lat', 'center_lon', 'demand_score', 'supply_score', 'commercial_count', 'station_count', 'supply_demand_ratio', 'population_density', 'accessibility_score', 'transport_score', 'cluster']


# XGBoost

In [53]:
from modeling.xgboost_model import train_and_predict

selected_features = ['supply_score', 'station_count', 'commercial_count', 'cluster']

features, metrics = train_and_predict(
    features,
    features=selected_features,
    n_estimators=XGB_N_ESTIMATORS
)

features.to_csv(f"{DATA_DIR}/grid_features_with_prediction.csv", index=False)
print("XGBoost 예측 성능:", metrics)

XGBoost 성능:
MAE: 680.78
RMSE: 883.08
R2: -3.2985
XGBoost 예측 성능: {'MAE': 680.78, 'RMSE': 883.08, 'R2': -3.2985}


# MCLP

In [55]:
from modeling.mclp_model import solve_mclp

# MCLP 실행
features = solve_mclp(
    df=features,
    coverage_radius=COVERAGE_RADIUS,
    facility_limit=FACILITY_LIMIT,
    demand_column='predicted_demand_score'
)

# 결과 저장
features.to_csv(f"{OUTPUT_DIR}/mclp_selected_sites.csv", index=False)

# 설치 위치 확인
selected = features[features['selected'] == 1]
print(f"설치 대상지 수: {len(selected)}")
print(f"커버 수요 총합: {selected['predicted_demand_score'].sum():,.2f}")

📍 커버리지 행렬 생성 중: 100%|██████████| 47/47 [00:00<00:00, 1117.82it/s]
🧩 제약조건 생성 중: 100%|██████████| 47/47 [00:00<00:00, 62324.47it/s]

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /opt/anaconda3/envs/25bigdata/lib/python3.10/site-packages/pulp/apis/../solverdir/cbc/osx/i64/cbc /var/folders/g0/wf5k560d45g5_rq_z1jbdpq40000gn/T/dea3e4e95ef94072a7d0e797bdf44042-pulp.mps -max -timeMode elapsed -branch -printingOptions all -solution /var/folders/g0/wf5k560d45g5_rq_z1jbdpq40000gn/T/dea3e4e95ef94072a7d0e797bdf44042-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 53 COLUMNS
At line 644 RHS
At line 693 BOUNDS
At line 788 ENDATA
Problem MODEL has 48 rows, 94 columns and 355 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 146079 - 0.00 seconds
Cgl0004I processed model has 40 rows, 69 columns (69 integer (61 of which binary)) and 206 elements
Cbc0038I Initial state - 0 integers unsatisfied sum - 0
Cbc0038I Solution found of -146079
Cbc0038I Cleaned solution of -146079
Cbc0038I Be




# 평가

In [56]:
print("'selected' in features.columns:", 'selected' in features.columns)
print(features['selected'].value_counts() if 'selected' in features.columns else "selected 열 없음")

'selected' in features.columns: True
selected
0    31
1    16
Name: count, dtype: int64


In [57]:
print("'selected' in features.columns:", 'selected' in features.columns)

'selected' in features.columns: True


In [None]:
features = pd.read_csv(f"{OUTPUT_DIR}/mclp_selected_sites.csv")

# 기존 충전소 데이터 로드
stations = pd.read_csv(f"../data/raw/한국환경공단_전기차 충전소 위치 및 운영정보(충전소 ID 포함)_20230531.csv", encoding='cp949')

# 평가 실행
baseline1 = evaluate_existing_stations(features, stations)
baseline2 = evaluate_random_installation(features, n=FACILITY_LIMIT)
baseline3 = evaluate_cluster_centers(features)
baseline4 = evaluate_mclp_result(features, facility_limit=FACILITY_LIMIT)

  stations = pd.read_csv(f"../data/raw/한국환경공단_전기차 충전소 위치 및 운영정보(충전소 ID 포함)_20230531.csv", encoding='cp949')


In [None]:
results = pd.DataFrame([
    {'baseline': '① 기존 충전소', **baseline1},
    {'baseline': '② 랜덤 설치', **baseline2},
    {'baseline': '③ 클러스터 중심', **baseline3},
    {'baseline': '④ MCLP 최적화', **baseline4}
])

from IPython.display import display
display(results)

Unnamed: 0,baseline,coverage,coverage_rate,covered_grids,dsr,efficiency,selected,total_demand
0,① 기존 충전소,146079.065,100.0,47.0,,,,
1,② 랜덤 설치,92650.9073,63.425178,30.0,,,,
2,③ 클러스터 중심,4109.501,2.813203,1.0,,,,
3,④ MCLP 최적화,50889.2108,34.836758,,1696.307027,3180.575675,16.0,146079.065
