# 위기 신호 지표 산출 노트북

EDA 결과를 기반으로 정의한 위기 신호 지표를 계산하고, 가맹점-월 단위의 위험 스코어를 생성합니다.



## 1. 환경 설정 및 데이터 로드
- CSV 데이터 경로와 컬럼 매핑을 정의합니다.
- 분석에 필요한 컬럼 이름을 한글로 정리하고, `기준년월`을 날짜 형식으로 변환합니다.



In [1]:

import pandas as pd
import numpy as np
from pathlib import Path

pd.set_option('display.max_columns', 80)
pd.set_option('display.width', 180)

DATA_PATH = Path('..') / 'data' / 'df_merged.csv'

rename_map = {
    'Unnamed: 0': '레코드ID',
    'ENCODED_MCT': '가맹점ID',
    'TA_YM': '기준년월',
    'MCT_OPE_MS_CN': '영업개월수_구간',
    'RC_M1_SAA': '최근1개월_이용금액_구간',
    'RC_M1_TO_UE_CT': '최근1개월_이용건수_구간',
    'RC_M1_UE_CUS_CN': '최근1개월_이용고객수_구간',
    'RC_M1_AV_NP_AT': '최근1개월_평균결제금액_구간',
    'APV_CE_RAT': '승인취소비율_구간',
    'DLV_SAA_RAT': '배달이용금액_비율',
    'M1_SME_RY_SAA_RAT': '1개월_동종업종_매출비율',
    'M1_SME_RY_CNT_RAT': '1개월_동종업종_건수비율',
    'M12_SME_RY_SAA_PCE_RT': '12개월_동종업종_매출순위',
    'M12_SME_BZN_SAA_PCE_RT': '12개월_동일상권_매출순위',
    'M12_SME_RY_ME_MCT_RAT': '12개월_동종업종_경쟁강도',
    'M12_SME_BZN_ME_MCT_RAT': '12개월_동일상권_경쟁강도',
    'MCT_UE_CLN_REU_RAT': '재방문고객비율',
    'MCT_UE_CLN_NEW_RAT': '신규고객비율',
    'RC_M1_SHC_RSD_UE_CLN_RAT': 'RC_거주지기준고객비율',
    'RC_M1_SHC_WP_UE_CLN_RAT': 'RC_직장인고객비율',
    'RC_M1_SHC_FLP_UE_CLN_RAT': 'RC_유동고객비율'
}

bucket_cols = [
    '영업개월수_구간',
    '최근1개월_이용금액_구간',
    '최근1개월_이용건수_구간',
    '최근1개월_이용고객수_구간',
    '최근1개월_평균결제금액_구간',
    '승인취소비율_구간'
]

raw_df = pd.read_csv(DATA_PATH, encoding='utf-8-sig')
df = raw_df.rename(columns=rename_map)
if '레코드ID' in df.columns:
    df = df.drop(columns=['레코드ID'])

df['기준년월'] = pd.to_datetime(df['기준년월'], errors='coerce')
df = df[df['기준년월'].notna()].copy()

numeric_cols = df.select_dtypes(include=['number']).columns
placeholder = -999999.9
placeholder_count = (df[numeric_cols] == placeholder).sum().sum()
df[numeric_cols] = df[numeric_cols].replace(placeholder, np.nan)

for col in bucket_cols:
    if col in df.columns:
        parts = df[col].astype(str).str.split('_', n=1, expand=True)
        df[f'{col}_순위'] = pd.to_numeric(parts[0], errors='coerce')
        if parts.shape[1] > 1:
            df[f'{col}_범주'] = parts[1].replace({'nan': np.nan})

print(f'data shape after cleaning: {df.shape}')
print(f'placeholder replacements: {placeholder_count:,}')
df.head()



data shape after cleaning: (86590, 51)
placeholder replacements: 78,764


Unnamed: 0,가맹점ID,MCT_BSE_AR,MCT_NM,MCT_BRD_NUM,MCT_SIGUNGU_NM,HPSN_MCT_ZCD_NM,HPSN_MCT_BZN_CD_NM,ARE_D,MCT_ME_D,기준년월,영업개월수_구간,최근1개월_이용금액_구간,최근1개월_이용건수_구간,최근1개월_이용고객수_구간,최근1개월_평균결제금액_구간,승인취소비율_구간,배달이용금액_비율,1개월_동종업종_매출비율,1개월_동종업종_건수비율,12개월_동종업종_매출순위,12개월_동일상권_매출순위,12개월_동종업종_경쟁강도,12개월_동일상권_경쟁강도,M12_MAL_1020_RAT,M12_MAL_30_RAT,M12_MAL_40_RAT,M12_MAL_50_RAT,M12_MAL_60_RAT,M12_FME_1020_RAT,M12_FME_30_RAT,M12_FME_40_RAT,M12_FME_50_RAT,M12_FME_60_RAT,재방문고객비율,신규고객비율,RC_거주지기준고객비율,RC_직장인고객비율,RC_유동고객비율,업종_대분류,영업개월수_구간_순위,영업개월수_구간_범주,최근1개월_이용금액_구간_순위,최근1개월_이용금액_구간_범주,최근1개월_이용건수_구간_순위,최근1개월_이용건수_구간_범주,최근1개월_이용고객수_구간_순위,최근1개월_이용고객수_구간_범주,최근1개월_평균결제금액_구간_순위,최근1개월_평균결제금액_구간_범주,승인취소비율_구간_순위,승인취소비율_구간_범주
0,16184E93D9,서울 성동구 마장동,성우**,,서울 성동구,축산물,마장동,20130320,,2024-05-01,2_10-25%,3_25-50%,4_50-75%,4_50-75%,2_10-25%,6_상위6구간(하위1구간),,70.4,33.9,27.2,20.8,12.3,5.6,4.148,9.044,11.084,19.448,24.276,1.952,4.256,5.216,9.152,11.424,18.82,6.47,50.0,7.1,42.9,식료품/소매,2,10-25%,3,25-50%,4,50-75%,4,50-75%,2,10-25%,6.0,상위6구간(하위1구간)
1,16184E93D9,서울 성동구 마장동,성우**,,서울 성동구,축산물,마장동,20130320,,2023-04-01,2_10-25%,4_50-75%,4_50-75%,4_50-75%,2_10-25%,6_상위6구간(하위1구간),,74.6,22.9,11.5,21.3,11.3,6.0,3.9864,11.174,9.4224,16.912,18.9052,2.6136,7.326,6.1776,11.088,12.3948,16.51,5.05,25.0,6.3,68.8,식료품/소매,2,10-25%,4,50-75%,4,50-75%,4,50-75%,2,10-25%,6.0,상위6구간(하위1구간)
2,16184E93D9,서울 성동구 마장동,성우**,,서울 성동구,축산물,마장동,20130320,,2023-08-01,2_10-25%,3_25-50%,4_50-75%,4_50-75%,2_10-25%,1_상위1구간,,85.6,22.1,11.5,20.6,12.0,5.4,3.906,11.214,10.647,17.451,19.782,2.294,6.586,6.253,10.249,11.618,16.98,6.6,17.6,0.0,82.4,식료품/소매,2,10-25%,3,25-50%,4,50-75%,4,50-75%,2,10-25%,1.0,상위1구간
3,16184E93D9,서울 성동구 마장동,성우**,,서울 성동구,축산물,마장동,20130320,,2024-02-01,2_10-25%,3_25-50%,4_50-75%,4_50-75%,1_10%이하,1_상위1구간,,90.5,35.8,27.8,22.2,12.3,5.0,4.7056,10.38,10.38,21.8672,21.8672,2.0944,4.62,4.62,9.7328,9.7328,16.95,9.6,15.8,5.3,78.9,식료품/소매,2,10-25%,3,25-50%,4,50-75%,4,50-75%,1,10%이하,1.0,상위1구간
4,16184E93D9,서울 성동구 마장동,성우**,,서울 성동구,축산물,마장동,20130320,,2024-06-01,2_10-25%,3_25-50%,4_50-75%,4_50-75%,1_10%이하,1_상위1구간,,81.1,27.5,26.9,21.0,12.2,5.6,3.9345,7.869,10.836,18.1245,23.6715,2.1655,4.331,5.964,9.9755,13.0285,20.96,4.79,26.7,0.0,73.3,식료품/소매,2,10-25%,3,25-50%,4,50-75%,4,50-75%,1,10%이하,1.0,상위1구간


## 2. 기본 정렬 및 공통 파생
- 가맹점과 기준년월 기준으로 정렬합니다.
- 분석에 자주 사용하는 순위 및 비율 컬럼을 준비합니다.



In [2]:

df_sorted = df.sort_values(['가맹점ID', '기준년월']).reset_index(drop=True)
rank_col = '12개월_동일상권_매출순위'

df_sorted['분기'] = df_sorted['기준년월'].dt.to_period('Q')

key_cols = ['가맹점ID', '기준년월', rank_col, '승인취소비율_구간_순위', '재방문고객비율', '신규고객비율', '배달이용금액_비율', '영업개월수_구간_순위']
df_sorted[key_cols].head()



Unnamed: 0,가맹점ID,기준년월,12개월_동일상권_매출순위,승인취소비율_구간_순위,재방문고객비율,신규고객비율,배달이용금액_비율,영업개월수_구간_순위
0,000F03E44A,2023-01-01,75.1,1.0,0.0,100.0,,5
1,000F03E44A,2023-02-01,75.2,,0.0,0.0,,5
2,000F03E44A,2023-03-01,75.2,,0.0,0.0,,5
3,000F03E44A,2023-04-01,74.3,1.0,0.0,50.0,,5
4,000F03E44A,2023-05-01,74.5,1.0,0.0,33.33,,5


## 3. 승인 취소 위험 지표
- 최근 1개월과 직전 3개월 평균 순위를 비교해 `승인취소비율_급등지수`를 계산합니다.
- 순위가 5 이상인 구간에서 정상화까지의 기간(`승인취소비율_정상화월수`)을 산출합니다.



In [3]:

rolling_mean = (
    df_sorted.groupby('가맹점ID')['승인취소비율_구간_순위']
    .transform(lambda s: s.shift(1).rolling(3, min_periods=2).mean())
)

df_sorted['승인취소비율_순위_roll3'] = rolling_mean
df_sorted['승인취소비율_급등지수'] = df_sorted['승인취소비율_구간_순위'] - rolling_mean

def compute_recovery(group: pd.DataFrame) -> pd.Series:
    values = group['승인취소비율_구간_순위'].to_numpy()
    result = np.full(values.shape, np.nan, dtype=float)
    for idx, val in enumerate(values):
        if np.isnan(val) or val < 5:
            continue
        later = np.where(~np.isnan(values[idx + 1:]) & (values[idx + 1:] <= 3))[0]
        if later.size > 0:
            result[idx] = float(later[0] + 1)
    return pd.Series(result, index=group.index)

recovery = df_sorted.groupby('가맹점ID', group_keys=False).apply(compute_recovery)
df_sorted['승인취소비율_정상화월수'] = recovery

df_sorted[['가맹점ID', '기준년월', '승인취소비율_구간_순위', '승인취소비율_급등지수', '승인취소비율_정상화월수']].head(10)



  recovery = df_sorted.groupby('가맹점ID', group_keys=False).apply(compute_recovery)


Unnamed: 0,가맹점ID,기준년월,승인취소비율_구간_순위,승인취소비율_급등지수,승인취소비율_정상화월수
0,000F03E44A,2023-01-01,1.0,,
1,000F03E44A,2023-02-01,,,
2,000F03E44A,2023-03-01,,,
3,000F03E44A,2023-04-01,1.0,,
4,000F03E44A,2023-05-01,1.0,,
5,000F03E44A,2023-06-01,1.0,0.0,
6,000F03E44A,2023-07-01,,,
7,000F03E44A,2023-08-01,,,
8,000F03E44A,2023-09-01,1.0,,
9,000F03E44A,2023-10-01,,,


## 4. 고객 충성도/이탈 지표
- 재방문 및 신규 고객 비율에 대해 월별 분위 순위를 계산하고 음수 점수로 변환합니다.
- 두 점수를 합산한 `충성도_복합점수`와 3개월 대비 변동률 기반 `고객구조변화지수`를 산출합니다.



In [4]:

rebuy_score = df_sorted.groupby('기준년월')['재방문고객비율'].transform(
    lambda s: (s.rank(pct=True, method='min') - 1) * 100
)
new_score = df_sorted.groupby('기준년월')['신규고객비율'].transform(
    lambda s: (s.rank(pct=True, method='min') - 1) * 100
)

df_sorted['재방문_분위점수'] = rebuy_score
df_sorted['신규_분위점수'] = new_score

df_sorted['충성도_복합점수'] = df_sorted['재방문_분위점수'].fillna(0) + df_sorted['신규_분위점수'].fillna(0)
df_sorted['충성도위험_플래그'] = df_sorted['충성도_복합점수'] <= -30

rolling_new = df_sorted.groupby('가맹점ID')['신규고객비율'].transform(lambda s: s.rolling(3, min_periods=2).mean())
rolling_new_prev = df_sorted.groupby('가맹점ID')['신규고객비율'].transform(lambda s: s.shift(3).rolling(3, min_periods=2).mean())

change_ratio = (rolling_new - rolling_new_prev) / rolling_new_prev
change_ratio = change_ratio.replace([np.inf, -np.inf], np.nan)

df_sorted['신규고객비율_3M평균'] = rolling_new
df_sorted['신규고객비율_이전3M평균'] = rolling_new_prev
df_sorted['고객구조변화지수'] = change_ratio

df_sorted['고객구조변화_경보'] = df_sorted['고객구조변화지수'] <= -0.2

df_sorted[['가맹점ID', '기준년월', '충성도_복합점수', '충성도위험_플래그', '고객구조변화지수', '고객구조변화_경보']].head(10)



Unnamed: 0,가맹점ID,기준년월,충성도_복합점수,충성도위험_플래그,고객구조변화지수,고객구조변화_경보
0,000F03E44A,2023-01-01,-101.062064,True,,False
1,000F03E44A,2023-02-01,-199.934102,True,,False
2,000F03E44A,2023-03-01,-199.935065,True,,False
3,000F03E44A,2023-04-01,-103.093112,True,,False
4,000F03E44A,2023-05-01,-104.347826,True,-0.444467,True
5,000F03E44A,2023-06-01,-105.539994,True,0.0833,False
6,000F03E44A,2023-07-01,-199.938688,True,0.1666,False
7,000F03E44A,2023-08-01,-199.939686,True,-0.699988,True
8,000F03E44A,2023-09-01,-108.392645,True,-0.815379,True
9,000F03E44A,2023-10-01,-199.94152,True,-0.657123,True


## 5. 매출 순위 및 경쟁 환경 지표
- 3개월 전 대비 동일상권 매출순위 변화를 통해 `순위급락강도점수`를 계산합니다.
- 분기별로 동일상권 경쟁강도와 매출순위의 상관계수를 `경쟁강도_영향도`로 정리합니다.



In [5]:

rank_diff_3m = df_sorted.groupby('가맹점ID')[rank_col].diff(periods=3)

df_sorted['순위급락강도점수'] = np.select(
    [rank_diff_3m >= 40, rank_diff_3m >= 20],
    [2, 1],
    default=0
)

df_sorted['동일상권_순위변화_3M'] = rank_diff_3m

competition_influence = (
    df_sorted[['분기', '12개월_동일상권_경쟁강도', rank_col]]
    .dropna()
    .groupby('분기')
    .apply(lambda g: g['12개월_동일상권_경쟁강도'].corr(g[rank_col]))
    .dropna()
    .rename('경쟁강도_상관')
    .reset_index()
)

competition_influence.tail()



  df_sorted[['분기', '12개월_동일상권_경쟁강도', rank_col]]


Unnamed: 0,분기,경쟁강도_상관
3,2023Q4,-0.016772
4,2024Q1,0.040831
5,2024Q2,0.065305
6,2024Q3,0.043411
7,2024Q4,0.052355


## 6. 채널 믹스 지표
- 배달 비율의 3개월 변화량과 영업개월수 순위를 활용해 `배달의존도급변점수`를 계산합니다.
- 6개월 이동 표준편차 대비 평균으로 `배달의존도_안정성점수`를 구합니다.



In [6]:

delivery_change = df_sorted.groupby('가맹점ID')['배달이용금액_비율'].diff(periods=3)

rolling_std = df_sorted.groupby('가맹점ID')['배달이용금액_비율'].transform(lambda s: s.rolling(6, min_periods=3).std())
rolling_mean = df_sorted.groupby('가맹점ID')['배달이용금액_비율'].transform(lambda s: s.rolling(6, min_periods=3).mean())

stability_score = np.where(rolling_mean > 0, rolling_std / rolling_mean, np.nan)

df_sorted['배달비율변화_3M'] = delivery_change

df_sorted['배달의존도급변점수'] = np.select(
    [
        (df_sorted['영업개월수_구간_순위'] <= 2) & (delivery_change >= 30),
        (df_sorted['영업개월수_구간_순위'] <= 2) & (delivery_change >= 20)
    ],
    [2, 1],
    default=0
)

df_sorted['배달의존도_안정성점수'] = stability_score

df_sorted[['가맹점ID', '기준년월', '배달비율변화_3M', '배달의존도급변점수', '배달의존도_안정성점수']].head(10)



Unnamed: 0,가맹점ID,기준년월,배달비율변화_3M,배달의존도급변점수,배달의존도_안정성점수
0,000F03E44A,2023-01-01,,0,
1,000F03E44A,2023-02-01,,0,
2,000F03E44A,2023-03-01,,0,
3,000F03E44A,2023-04-01,,0,
4,000F03E44A,2023-05-01,,0,
5,000F03E44A,2023-06-01,,0,
6,000F03E44A,2023-07-01,,0,
7,000F03E44A,2023-08-01,,0,
8,000F03E44A,2023-09-01,,0,
9,000F03E44A,2023-10-01,,0,


## 7. 복합 위기 스코어 산출
- 각 지표를 0~1 범위로 스케일링한 후 가중치(0.3, 0.3, 0.2, 0.2)를 적용해 `위기총점`을 계산합니다.
- 총점을 기준으로 `위기등급`을 구간화합니다.



In [7]:

cancel_component = np.clip(
    (df_sorted['승인취소비율_구간_순위'].fillna(0) / 10) + (df_sorted['승인취소비율_급등지수'].fillna(0) / 5),
    0,
    1
)
loyalty_component = np.clip(-df_sorted['충성도_복합점수'].fillna(0) / 60, 0, 1)
rank_component = np.clip(df_sorted['순위급락강도점수'] / 2, 0, 1)

channel_spike = np.clip(df_sorted['배달의존도급변점수'] / 2, 0, 1)
channel_stability = np.clip(df_sorted['배달의존도_안정성점수'].fillna(0), 0, 1)
channel_component = np.clip(0.5 * channel_spike + 0.5 * channel_stability, 0, 1)

weights = {
    'cancel': 0.3,
    'loyalty': 0.3,
    'rank': 0.2,
    'channel': 0.2
}

df_sorted['위기총점'] = (
    weights['cancel'] * cancel_component +
    weights['loyalty'] * loyalty_component +
    weights['rank'] * rank_component +
    weights['channel'] * channel_component
)

df_sorted['위기등급'] = pd.cut(
    df_sorted['위기총점'],
    bins=[0, 0.4, 0.6, 1.0],
    labels=['관찰', '주의', '위험'],
    right=False
)

df_sorted[['가맹점ID', '기준년월', '위기총점', '위기등급']].head(10)



Unnamed: 0,가맹점ID,기준년월,위기총점,위기등급
0,000F03E44A,2023-01-01,0.33,관찰
1,000F03E44A,2023-02-01,0.3,관찰
2,000F03E44A,2023-03-01,0.3,관찰
3,000F03E44A,2023-04-01,0.33,관찰
4,000F03E44A,2023-05-01,0.33,관찰
5,000F03E44A,2023-06-01,0.33,관찰
6,000F03E44A,2023-07-01,0.3,관찰
7,000F03E44A,2023-08-01,0.3,관찰
8,000F03E44A,2023-09-01,0.33,관찰
9,000F03E44A,2023-10-01,0.3,관찰


## 8. 요약 및 저장 예시
- 최근 6개월 위기 등급 분포를 확인합니다.
- 필요하다면 CSV로 내보낼 수 있도록 예시 코드를 제공합니다.



In [8]:

recent_mask = df_sorted['기준년월'] >= (df_sorted['기준년월'].max() - pd.DateOffset(months=5))
summary = (
    df_sorted[recent_mask]
    .groupby(['기준년월', '위기등급'])['가맹점ID']
    .nunique()
    .unstack(fill_value=0)
    .sort_index()
)
summary



  df_sorted[recent_mask]


위기등급,관찰,주의,위험
기준년월,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-07-01,2913,663,351
2024-08-01,2957,685,338
2024-09-01,2902,716,412
2024-10-01,3059,693,335
2024-11-01,3064,727,340
2024-12-01,3052,716,387


In [9]:

# 저장 예시 (필요시 주석 해제)
# output_path = Path('risk_indicator_snapshot.csv')
# df_sorted.to_csv(output_path, index=False)
# print(f'saved: {output_path.resolve()}')

