# 04. 지역 비교 분석 (Regional Comparison)

구별/역별 경쟁도, 포화도, 수익 잠재력을 비교 분석합니다.

In [None]:
import sys
sys.path.insert(0, '..')

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import folium
from folium.plugins import HeatMap
from models.database import session_scope
from models.schema import Station, Listing, DailyStat

In [None]:
with session_scope() as session:
    stations = pd.read_sql(session.query(Station).statement, session.bind)
    listings = pd.read_sql(session.query(Listing).statement, session.bind)
    daily_stats = pd.read_sql(session.query(DailyStat).statement, session.bind)

daily_stats['date'] = pd.to_datetime(daily_stats['date'])
total_stats = daily_stats[daily_stats['room_type'].isna()].copy()

# 역별 평균 집계
station_summary = (
    total_stats.groupby('station_id')
    .agg(
        avg_booking_rate=('booking_rate', 'mean'),
        avg_price=('avg_daily_price', 'mean'),
        total_revenue=('estimated_revenue', 'sum'),
        avg_listings=('total_listings', 'mean'),
    )
    .reset_index()
    .merge(stations[['id', 'name', 'line', 'district', 'latitude', 'longitude']],
           left_on='station_id', right_on='id')
)

print(f'분석 역 수: {len(station_summary)}')

## 1. 구별 예약률 비교

In [None]:
district_stats = (
    station_summary.groupby('district')
    .agg(
        avg_booking_rate=('avg_booking_rate', 'mean'),
        avg_price=('avg_price', 'mean'),
        total_revenue=('total_revenue', 'sum'),
        station_count=('station_id', 'count'),
    )
    .reset_index()
    .sort_values('avg_booking_rate', ascending=False)
)

fig = px.bar(district_stats.head(15), x='district', y='avg_booking_rate',
             title='구별 평균 예약률 (상위 15개)',
             labels={'district': '구', 'avg_booking_rate': '평균 예약률'},
             color='avg_price',
             color_continuous_scale='Blues')
fig.update_yaxes(tickformat='.0%')
fig.show()

## 2. 호선별 비교

In [None]:
line_stats = (
    station_summary.groupby('line')
    .agg(
        avg_booking_rate=('avg_booking_rate', 'mean'),
        avg_price=('avg_price', 'mean'),
        total_revenue=('total_revenue', 'sum'),
        station_count=('station_id', 'count'),
    )
    .reset_index()
    .sort_values('avg_booking_rate', ascending=False)
)

fig = px.scatter(line_stats, x='avg_booking_rate', y='avg_price',
                 size='station_count', color='line',
                 title='호선별 예약률 vs 평균 가격',
                 labels={
                     'avg_booking_rate': '평균 예약률',
                     'avg_price': '평균 일일 가격 (KRW)',
                     'station_count': '역 수',
                 },
                 hover_name='line')
fig.update_xaxes(tickformat='.0%')
fig.show()

## 3. 경쟁도 분석 - 역별 숙소 수 포화도

In [None]:
# 숙소 수 vs 예약률: 포화 여부 판단
fig = px.scatter(station_summary,
                 x='avg_listings', y='avg_booking_rate',
                 size='total_revenue',
                 color='district',
                 hover_name='name',
                 title='역별 숙소 수 vs 예약률 (버블: 총 수익)',
                 labels={
                     'avg_listings': '평균 숙소 수',
                     'avg_booking_rate': '평균 예약률',
                     'total_revenue': '총 수익',
                 })
fig.update_yaxes(tickformat='.0%')
fig.show()

## 4. 수익 잠재력 지도 (folium)

In [None]:
m = folium.Map(location=[37.5665, 126.9780], zoom_start=12)

# 수익 정규화
max_rev = station_summary['total_revenue'].max()
if max_rev > 0:
    heatmap_data = [
        [row['latitude'], row['longitude'], row['total_revenue'] / max_rev]
        for _, row in station_summary.iterrows()
        if row['latitude'] and row['longitude']
    ]
    HeatMap(heatmap_data, radius=20).add_to(m)

# 상위 5개 역 마커
for _, row in station_summary.nlargest(5, 'total_revenue').iterrows():
    if row['latitude'] and row['longitude']:
        folium.Marker(
            [row['latitude'], row['longitude']],
            popup=(
                f"{row['name']}\n"
                f"예약률: {row['avg_booking_rate']:.1%}\n"
                f"총 수익: {row['total_revenue']:,.0f} KRW"
            ),
            icon=folium.Icon(color='red', icon='star'),
        ).add_to(m)

m

## 5. 투자 적합 지역 스코어링

In [None]:
# 스코어 = 예약률 * 0.5 + 정규화된 수익 * 0.5
ss = station_summary.copy()
ss['revenue_norm'] = ss['total_revenue'] / ss['total_revenue'].max()
ss['score'] = ss['avg_booking_rate'] * 0.5 + ss['revenue_norm'] * 0.5

top_invest = ss.sort_values('score', ascending=False).head(10)

print('=== 투자 적합 상위 10개 역 ===')
print(
    top_invest[['name', 'district', 'avg_booking_rate', 'avg_price',
                'total_revenue', 'score']]
    .to_string(index=False)
)

fig = px.bar(
    top_invest, x='name', y='score',
    color='district',
    title='투자 적합도 스코어 상위 10개 역',
    labels={'name': '역명', 'score': '스코어'},
)
fig.show()