In [28]:
import numpy as np
import pandas as pd
import os

# 경로 설정 및 데이터 로딩
current_dir = os.getcwd()
project_root = os.path.dirname(os.path.dirname(current_dir))
DATA_DIR = os.path.join(project_root, 'data', 'processed')
df_grids = pd.read_csv(os.path.join(DATA_DIR, 'grid_features_with_prediction_all.csv'))

# 효율성 계산 (수요가 0이면 0)
df_grids['efficiency'] = np.where(
    df_grids['demand_score'] == 0, 0, df_grids['supply_score'] / df_grids['demand_score']
)

total_demand = df_grids['demand_score'].sum()

# 벡터화 하버사인 함수
def haversine_vec(lat1, lon1, lat2_series, lon2_series):
    lat1_rad, lon1_rad = np.radians(lat1), np.radians(lon1)
    lat2_rad, lon2_rad = np.radians(lat2_series), np.radians(lon2_series)
    dlat = lat2_rad - lat1_rad
    dlon = lon2_rad - lon1_rad
    a = np.sin(dlat/2)**2 + np.cos(lat1_rad)*np.cos(lat2_rad)*np.sin(dlon/2)**2
    return 6371 * 2 * np.arcsin(np.sqrt(a))

R = 1.0  # km
coverage_rates = []
covered_demands = []

for idx, row in df_grids.iterrows():
    distances = haversine_vec(
        row['center_lat'], row['center_lon'],
        df_grids['center_lat'], df_grids['center_lon']
    )
    covered = df_grids.loc[distances <= R, 'demand_score'].sum()
    covered_demands.append(covered)
    coverage_rates.append(covered / total_demand * 100 if total_demand > 0 else 0)

df_grids['covered_demand'] = covered_demands
df_grids['coverage_rate'] = coverage_rates

# 엔트로피 기반 가중치 계산
X = df_grids[['demand_score', 'supply_score', 'efficiency', 'coverage_rate']].values
X = X / X.sum(axis=0, keepdims=True)
E = -np.nansum(X * np.log(X + 1e-12), axis=0) / np.log(len(X))
d = 1 - E
weights = d / d.sum()

# 우선순위 점수 산출 (엔트로피 가중치 반영)
# 각 컬럼별로 엔트로피 가중치를 곱해 합산
df_grids['priority_score'] = (
    df_grids['demand_score'] * weights[0] +
    df_grids['supply_score'] * weights[1] +
    df_grids['efficiency'] * weights[2] +
    df_grids['coverage_rate'] * weights[3]
)

# 랭킹 산출
grid_ranking = df_grids.sort_values('priority_score', ascending=False)[
    ['grid_id', 'demand_score', 'supply_score', 'efficiency', 'covered_demand', 'coverage_rate', 'priority_score']
].reset_index(drop=True)
grid_ranking['rank'] = grid_ranking.index + 1

print("격자별 우선순위 랭킹 상위 10개:")
print(grid_ranking.head(10))

print("\n엔트로피 기반 가중치:", weights.tolist())


격자별 우선순위 랭킹 상위 10개:
        grid_id  demand_score  supply_score  efficiency  covered_demand  \
0  GRID_021_058        4838.0     188.96379    0.039058         24728.0   
1  GRID_035_049        4967.0      70.84313    0.014263         18525.0   
2  GRID_020_055        4131.5     188.96379    0.045737         11820.0   
3  GRID_023_054        4109.5      87.41647    0.021272          6754.5   
4  GRID_035_023        3844.5     116.86713    0.030399          9621.5   
5  GRID_023_063        3625.0     244.96785    0.067577         18520.5   
6  GRID_026_036        3723.0     130.19135    0.034969         12846.0   
7  GRID_038_051        3630.5     175.63305    0.048377         19122.5   
8  GRID_036_052        3579.5     175.63305    0.049066         22334.5   
9  GRID_033_039        3570.5     161.70832    0.045290         17615.5   

   coverage_rate  priority_score  rank  
0       3.455260      805.112668     1  
1       2.588511      797.457051     2  
2       1.651617      693.60600