In [45]:
# === Dataset source: merged CSV (Jupyter & Streamlit compatible) ===
import altair as alt
import re
from pathlib import Path
import pandas as pd, numpy as np
import streamlit as st

# 1) BASE 경로: .py 파일이면 __file__, 노트북이면 CWD 사용
try:
    BASE = Path(__file__).parent
except NameError:
    BASE = Path.cwd()

# 2) 후보 경로에서 CSV 검색 (존재하는 첫 파일 사용)
CANDIDATES = [
    BASE / "spotify_merged.csv",                          # 권장: 루트에 두기
    BASE / "data" / "raw" / "spotify_merged.csv",
    BASE / "spotify_cleaned_final_v2.csv",                # 백업: 정제 CSV (있다면)
    BASE / "data" / "raw" / "spotify_cleaned_final_v2.csv",
]
CSV_PATH = next((p for p in CANDIDATES if p.exists()), None)
if CSV_PATH is None:
    st.error("❌ merged CSV를 찾지 못했습니다. `spotify_merged.csv`를 프로젝트 루트(또는 data/raw)에 두세요.")
    st.stop()

@st.cache_data(show_spinner=False)
def load_merged_csv(path: Path):
    """원본+매출지표 병합 CSV 로더
    - df_raw: 원본 그대로 (KPI/프리뷰/결측치 표시에 사용)
    - df: 차트용 작업본 (원본 열수에 영향 없이 파생만 추가)
    """
    df_raw = pd.read_csv(path)

    # 작업본 복사
    dfx = df_raw.copy()

    # revenue_num: 문자→숫자 (₩, 콤마 제거)
    if "revenue" in dfx.columns:
        def to_number(x):
            if pd.isna(x): return np.nan
            s = re.sub(r"[^0-9.\-]", "", str(x))
            return float(s) if s else np.nan
        dfx["revenue_num"] = dfx["revenue"].map(to_number)

    # 요금제 표준화 컬럼
    plan_col = next((c for c in ["subscription_plan","spotify_subscription_plan","subscription_plan_norm"]
                     if c in dfx.columns), None)
    if plan_col and "subscription_plan_norm" not in dfx.columns:
        dfx["subscription_plan_norm"] = dfx[plan_col].astype(str)

    # month 문자열화(Altair 정렬 일관성)
    if "month" in dfx.columns:
        dfx["month"] = dfx["month"].astype(str)

    return df_raw, dfx

# 3) 실제 로드 (여기서 반드시 df_raw/df를 생성해야 아래에서 사용 가능)
try:
    df_raw, df = load_merged_csv(CSV_PATH)
except Exception as e:
    st.error(f"데이터 로드 실패: {e}")
    st.stop()

# 4) 디버깅/확인용 캡션 (원하면 지워도 됨)
st.caption(f"✅ CSV 로드: {CSV_PATH.name} — {len(df_raw):,}행 × {len(df_raw.columns)}열 (원본 기준)")

2025-10-25 23:13:50.319 No runtime found, using MemoryCacheStorageManager


DeltaGenerator()

In [46]:
import pandas as pd
import numpy as np

# 1) 데이터 불러오기
df = pd.read_csv("spotify_cleaned_final_v2.csv")

In [47]:
# 1) 데이터 불러오기
df = pd.read_csv("spotify_cleaned_final_v2.csv")

In [48]:
# 2) 프리미엄 여부 파생
df["is_premium"] = df["subscription_plan"].str.lower().str.contains("premium").astype(int)

In [49]:
# 3) 주요 현황 확인
print("✅ 행 수:", len(df))
print("✅ 사용자 수:", df["userid"].nunique())
print("✅ 월 구간:", df["month"].unique())
print("✅ 프리미엄 이용 비율:", round(df["is_premium"].mean()*100, 1), "%")

✅ 행 수: 3120
✅ 사용자 수: 520
✅ 월 구간: ['2023-01' '2023-02' '2023-03' '2023-04' '2023-05' '2023-06']
✅ 프리미엄 이용 비율: 25.2 %


In [50]:
# 4) 취향 관련 컬럼 리스트 저장 (나중에 반복문으로 그룹비교할 때 씀)
pref_cols = [
    "premium_sub_willingness", "preffered_premium_plan", "preferred_listening_content",
    "fav_music_genre", "music_time_slot", "music_Influencial_mood", "music_lis_frequency",
    "music_expl_method", "music_recc_rating", "pod_lis_frequency", "fav_pod_genre",
    "preffered_pod_format", "pod_host_preference", "preffered_pod_duration",
    "pod_variety_satisfaction"
]
print("✅ 취향 변수 수:", len(pref_cols))

✅ 취향 변수 수: 15


In [51]:
# 🎯 Step 2. KPI + 파생변수 계산

# 1) 프리미엄 유지 기간 (6개월 중 몇 번 Premium이었는지)
premium_duration = df.groupby("userid")["is_premium"].sum().reset_index()
premium_duration = premium_duration.rename(columns={"is_premium": "premium_duration"})

In [52]:
# 2) 유저별 총 수익 (LTV)
ltv_user = df.groupby("userid")["revenue"].sum().reset_index().rename(columns={"revenue": "ltv"})

In [53]:
# 3) 유저별 평균 월 매출
ltv_user = ltv_user.merge(premium_duration, on="userid", how="left")
ltv_user["avg_monthly_revenue"] = ltv_user["ltv"] / ltv_user["premium_duration"].replace(0, np.nan)

In [54]:
# 4) Free → Premium 전환 경험 여부
months = sorted(df["month"].unique())
first_m, later_ms = months[0], months[1:]
free_first = set(df[(df["month"] == first_m) & (df["is_premium"] == 0)]["userid"])
prem_later = set(df[(df["month"].isin(later_ms)) & (df["is_premium"] == 1)]["userid"])
ltv_user["is_free_to_premium"] = ltv_user["userid"].isin(free_first & prem_later).astype(int)

In [55]:
# 5) Premium 유지율 (월→다음달)
ret_rows = []
for i in range(len(months)-1):
    a, b = months[i], months[i+1]
    A = set(df[(df["month"]==a) & (df["is_premium"]==1)]["userid"])
    B = set(df[(df["month"]==b) & (df["is_premium"]==1)]["userid"])
    retained = (len(A & B) / len(A)) if A else np.nan
    ret_rows.append({"from_to": f"{a}→{b}", "premium_users": len(A), "premium_retention": retained})
ret_df = pd.DataFrame(ret_rows)

In [56]:
# 6) 월별 ARPU
arpu_monthly = df.groupby("month", as_index=False)["revenue"].mean().rename(columns={"revenue": "arpu"})
arpu_overall = df["revenue"].mean()

In [57]:
# 7) 주요 KPI 정리
kpi = pd.DataFrame({
    "metric": ["conversion_rate", "premium_retention_mean", "arpu_overall", "avg_premium_duration"],
    "value": [
        ltv_user["is_free_to_premium"].mean(),
        ret_df["premium_retention"].mean(),
        arpu_overall,
        ltv_user["premium_duration"].mean()
    ]
})

In [58]:
# 8) 결과 미리보기
print("✅ KPI 요약")
display(kpi)
print("\n✅ 월→다음달 Premium 유지율")
display(ret_df)
print("\n✅ 유저별 LTV 샘플")
display(ltv_user.head())

✅ KPI 요약


Unnamed: 0,metric,value
0,conversion_rate,0.438462
1,premium_retention_mean,0.459863
2,arpu_overall,15115.384615
3,avg_premium_duration,1.511538



✅ 월→다음달 Premium 유지율


Unnamed: 0,from_to,premium_users,premium_retention
0,2023-01→2023-02,126,0.706349
1,2023-02→2023-03,96,0.364583
2,2023-03→2023-04,102,0.343137
3,2023-04→2023-05,142,0.507042
4,2023-05→2023-06,156,0.378205



✅ 유저별 LTV 샘플


Unnamed: 0,userid,ltv,premium_duration,avg_monthly_revenue,is_free_to_premium
0,1,60000,1,60000.0,0
1,2,120000,2,60000.0,0
2,3,240000,4,60000.0,0
3,4,240000,4,60000.0,0
4,5,240000,4,60000.0,0


In [59]:
# 🎯 Step 3. 취향 변수별 그룹 비교
# df : 월별 원본 데이터
# ltv_user : 유저별 LTV, 유지개월, 전환여부 포함

# 1) 유저별 대표 취향값 (최근 월 기준)
rep_pref = (
    df.sort_values(["userid", "month"])
      .groupby("userid")
      .tail(1)[["userid"] + pref_cols]
)


In [60]:
# 2) 유저별 데이터에 취향 결합
ltv_user_pref = ltv_user.merge(rep_pref, on="userid", how="left")


In [61]:
# 3) 각 취향 변수별 비교 요약
pref_summary = []
for c in pref_cols:
    if c not in ltv_user_pref.columns:
        continue

    temp = ltv_user_pref.groupby(c).agg(
        users=("userid", "nunique"),
        avg_ltv=("ltv", "mean"),
        avg_premium_duration=("premium_duration", "mean"),
        avg_monthly_revenue=("avg_monthly_revenue", "mean"),
        free_to_premium_rate=("is_free_to_premium", "mean")
    ).reset_index()

    temp["variable"] = c
    pref_summary.append(temp)

summary_pref = pd.concat(pref_summary, ignore_index=True)

In [62]:
# 4) 결과 저장
summary_pref.to_csv("out_pref_group_summary.csv", index=False)

print("✅ 취향 변수별 비교 요약 (상위 10행)")
display(summary_pref.head(10))
print("\n📁 저장: out_pref_group_summary.csv")

✅ 취향 변수별 비교 요약 (상위 10행)


Unnamed: 0,premium_sub_willingness,users,avg_ltv,avg_premium_duration,avg_monthly_revenue,free_to_premium_rate,variable,preffered_premium_plan,preferred_listening_content,fav_music_genre,...,music_Influencial_mood,music_lis_frequency,music_expl_method,music_recc_rating,pod_lis_frequency,fav_pod_genre,preffered_pod_format,pod_host_preference,preffered_pod_duration,pod_variety_satisfaction
0,No,334,70778.443114,1.179641,60000.0,0.520958,premium_sub_willingness,,,,...,,,,,,,,,,
1,Yes,186,126451.612903,2.107527,60000.0,0.290323,premium_sub_willingness,,,,...,,,,,,,,,,
2,,84,107142.857143,1.785714,60000.0,0.321429,preffered_premium_plan,Duo plan- Rs 149/month,,,...,,,,,,,,,,
3,,39,126153.846154,2.102564,60000.0,0.205128,preffered_premium_plan,Family Plan-Rs 179/month,,,...,,,,,,,,,,
4,,95,131368.421053,2.189474,60000.0,0.410526,preffered_premium_plan,Individual Plan- Rs 119/ month,,,...,,,,,,,,,,
5,,203,70935.960591,1.182266,60000.0,0.55665,preffered_premium_plan,Not interested,,,...,,,,,,,,,,
6,,5,108000.0,1.8,60000.0,0.4,preffered_premium_plan,Not specified,,,...,,,,,,,,,,
7,,94,61914.893617,1.031915,60000.0,0.414894,preffered_premium_plan,Student Plan-Rs 59/month,,,...,,,,,,,,,,
8,,410,82829.268293,1.380488,60000.0,0.460976,preferred_listening_content,,Music,,...,,,,,,,,,,
9,,110,120000.0,2.0,60000.0,0.354545,preferred_listening_content,,Podcast,,...,,,,,,,,,,



📁 저장: out_pref_group_summary.csv


In [63]:
# 🎯 Step 4. 통계 검정
from scipy import stats
import numpy as np
import pandas as pd

tests = []

In [64]:
# 1️⃣ 전환율(is_free_to_premium) 차이: 카이제곱 검정
for c in pref_cols:
    if c not in ltv_user_pref.columns:
        continue
    # 교차표 생성
    ct = pd.crosstab(ltv_user_pref[c], ltv_user_pref["is_free_to_premium"])
    # 2개 이상 그룹, 2개 이상 카테고리일 때만
    if ct.shape[0] > 1 and ct.shape[1] > 1:
        chi2, p, dof, _ = stats.chi2_contingency(ct)
        tests.append({
            "feature": c,
            "test_type": "chi2 (conversion)",
            "p_value": p,
            "note": "p<0.05 → 전환율 차이가 유의미함"
        })


In [65]:
# 2️⃣ LTV 평균 차이: 일원분산분석 (ANOVA)
for c in pref_cols:
    if c not in ltv_user_pref.columns:
        continue
    groups = [g["ltv"].dropna().values for _, g in ltv_user_pref.groupby(c)]
    if len(groups) >= 2 and all(len(g) > 1 for g in groups):
        F, p = stats.f_oneway(*groups)
        tests.append({
            "feature": c,
            "test_type": "ANOVA (LTV)",
            "p_value": p,
            "note": "p<0.05 → LTV 평균 차이가 유의미함"
        })

In [66]:
# 3️⃣ 결과 정리
tests_df = pd.DataFrame(tests).sort_values("p_value")
tests_df.to_csv("out_pref_significance_tests.csv", index=False)

print("✅ 취향 변수별 유의성 검정 결과 (상위 10)")
display(tests_df.head(10))
print("\n📁 저장: out_pref_significance_tests.csv")

✅ 취향 변수별 유의성 검정 결과 (상위 10)


Unnamed: 0,feature,test_type,p_value,note
15,premium_sub_willingness,ANOVA (LTV),8.885363e-11,p<0.05 → LTV 평균 차이가 유의미함
23,preffered_pod_duration,ANOVA (LTV),1.26355e-08,p<0.05 → LTV 평균 차이가 유의미함
16,preffered_premium_plan,ANOVA (LTV),2.399703e-08,p<0.05 → LTV 평균 차이가 유의미함
0,premium_sub_willingness,chi2 (conversion),6.09386e-07,p<0.05 → 전환율 차이가 유의미함
19,music_recc_rating,ANOVA (LTV),4.128908e-05,p<0.05 → LTV 평균 차이가 유의미함
1,preffered_premium_plan,chi2 (conversion),0.0001188697,p<0.05 → 전환율 차이가 유의미함
17,preferred_listening_content,ANOVA (LTV),0.0002742748,p<0.05 → LTV 평균 차이가 유의미함
22,pod_host_preference,ANOVA (LTV),0.005180075,p<0.05 → LTV 평균 차이가 유의미함
18,music_time_slot,ANOVA (LTV),0.01047667,p<0.05 → LTV 평균 차이가 유의미함
20,pod_lis_frequency,ANOVA (LTV),0.02212103,p<0.05 → LTV 평균 차이가 유의미함



📁 저장: out_pref_significance_tests.csv


In [67]:
# 스텝1~4 산출물 점검 (없으면 에러 메시지로 알려줌)
import pandas as pd

need_vars = ["df", "ltv_user", "pref_cols"]
missing = [v for v in need_vars if v not in globals()]
assert not missing, f"다음 변수가 없습니다: {missing}. 이전 셀을 다시 실행해 주세요."

print("✅ df shape:", df.shape)
print("✅ 고유 사용자수:", df["userid"].nunique() if "userid" in df.columns else "userid 없음")
print("✅ ltv_user shape:", ltv_user.shape)
print("✅ 취향 변수 개수:", len(pref_cols), "→", pref_cols[:8], ("..." if len(pref_cols)>8 else ""))

# 스텝2 산출물(있으면) 확인
if "kpi" in globals(): 
    print("\n[KPI]"); display(kpi.head())
if "ret_df" in globals():
    print("\n[월→다음달 Premium 유지율]"); display(ret_df.head())
if "arpu_monthly" in globals():
    print("\n[월별 ARPU]"); display(arpu_monthly.head())

# 스텝3 요약
if "summary_pref" in globals():
    print("\n[취향 변수별 요약]"); display(summary_pref.head())

# 스텝4 검정 결과
if "tests_df" in globals():
    print("\n[유의성 검정 결과]"); display(tests_df.head())

✅ df shape: (3120, 26)
✅ 고유 사용자수: 520
✅ ltv_user shape: (520, 5)
✅ 취향 변수 개수: 15 → ['premium_sub_willingness', 'preffered_premium_plan', 'preferred_listening_content', 'fav_music_genre', 'music_time_slot', 'music_Influencial_mood', 'music_lis_frequency', 'music_expl_method'] ...

[KPI]


Unnamed: 0,metric,value
0,conversion_rate,0.438462
1,premium_retention_mean,0.459863
2,arpu_overall,15115.384615
3,avg_premium_duration,1.511538



[월→다음달 Premium 유지율]


Unnamed: 0,from_to,premium_users,premium_retention
0,2023-01→2023-02,126,0.706349
1,2023-02→2023-03,96,0.364583
2,2023-03→2023-04,102,0.343137
3,2023-04→2023-05,142,0.507042
4,2023-05→2023-06,156,0.378205



[월별 ARPU]


Unnamed: 0,month,arpu
0,2023-01,14538.461538
1,2023-02,11076.923077
2,2023-03,11769.230769
3,2023-04,16384.615385
4,2023-05,18000.0



[취향 변수별 요약]


Unnamed: 0,premium_sub_willingness,users,avg_ltv,avg_premium_duration,avg_monthly_revenue,free_to_premium_rate,variable,preffered_premium_plan,preferred_listening_content,fav_music_genre,...,music_Influencial_mood,music_lis_frequency,music_expl_method,music_recc_rating,pod_lis_frequency,fav_pod_genre,preffered_pod_format,pod_host_preference,preffered_pod_duration,pod_variety_satisfaction
0,No,334,70778.443114,1.179641,60000.0,0.520958,premium_sub_willingness,,,,...,,,,,,,,,,
1,Yes,186,126451.612903,2.107527,60000.0,0.290323,premium_sub_willingness,,,,...,,,,,,,,,,
2,,84,107142.857143,1.785714,60000.0,0.321429,preffered_premium_plan,Duo plan- Rs 149/month,,,...,,,,,,,,,,
3,,39,126153.846154,2.102564,60000.0,0.205128,preffered_premium_plan,Family Plan-Rs 179/month,,,...,,,,,,,,,,
4,,95,131368.421053,2.189474,60000.0,0.410526,preffered_premium_plan,Individual Plan- Rs 119/ month,,,...,,,,,,,,,,



[유의성 검정 결과]


Unnamed: 0,feature,test_type,p_value,note
15,premium_sub_willingness,ANOVA (LTV),8.885363e-11,p<0.05 → LTV 평균 차이가 유의미함
23,preffered_pod_duration,ANOVA (LTV),1.26355e-08,p<0.05 → LTV 평균 차이가 유의미함
16,preffered_premium_plan,ANOVA (LTV),2.399703e-08,p<0.05 → LTV 평균 차이가 유의미함
0,premium_sub_willingness,chi2 (conversion),6.09386e-07,p<0.05 → 전환율 차이가 유의미함
19,music_recc_rating,ANOVA (LTV),4.128908e-05,p<0.05 → LTV 평균 차이가 유의미함


In [71]:
# ===== STEP5-A. Revenue Feature Importance (안전 버전) =====
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

# 1) 후보 변수 설정 (있는 것만 자동 선택)
num_candidates = ["premium_sub_willingness", "music_recc_rating", "is_premium"]
cat_candidates = [
    "preferred_listening_content","fav_music_genre","music_time_slot",
    "music_Influencial_mood","music_lis_frequency","music_expl_method",
    "pod_lis_frequency","fav_pod_genre","preffered_pod_format",
    "pod_host_preference","preffered_pod_duration","pod_variety_satisfaction",
    "gender","subscription_plan","spotify_listening_device"
]

num_features = [c for c in num_candidates if c in df.columns]
cat_features = [c for c in cat_candidates if c in df.columns]

# feature가 하나도 없으면 종료
assert len(num_features + cat_features) > 0, "Revenue 예측용 feature가 없습니다. 후보 변수 존재 여부를 확인하세요."

# 2) X, y 구성
X = pd.DataFrame(index=df.index)
# 수치형: 강제로 numeric 변환
for c in num_features:
    X[c] = pd.to_numeric(df[c], errors="coerce")

# 범주형: 문자열 변환 → 결측 'NA' → 원핫
for c in cat_features:
    X[c] = df[c].astype(str).fillna("NA")
X = pd.get_dummies(X, columns=cat_features, drop_first=True)

# 결측 처리
X = X.replace([np.inf, -np.inf], np.nan).fillna(0)

y = pd.to_numeric(df["revenue"], errors="coerce").fillna(0)

# 표본/특징 최소 검사
assert X.shape[0] >= 20 and X.shape[1] >= 1, f"샘플/특징 부족: X={X.shape}, y={y.shape}"

# 3) 학습/검증
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=max(0.2, min(0.2, 0.2)), random_state=42)
rf_revenue = RandomForestRegressor(n_estimators=300, random_state=42)
rf_revenue.fit(X_train, y_train)

# 4) 중요도/성능 출력
imp_revenue = pd.Series(rf_revenue.feature_importances_, index=X.columns).sort_values(ascending=False)
r2_train = rf_revenue.score(X_train, y_train)
r2_test  = rf_revenue.score(X_test,  y_test)

print(f"✅ Revenue RF 학습 완료 | R² train={r2_train:.3f}, test={r2_test:.3f}")
display(imp_revenue.head(15))

✅ Revenue RF 학습 완료 | R² train=1.000, test=1.000


is_premium                                           0.5
subscription_plan_Premium                            0.5
premium_sub_willingness                              0.0
fav_pod_genre_Everything                             0.0
pod_lis_frequency_Rarely                             0.0
pod_lis_frequency_Several times a week               0.0
fav_pod_genre_Comedy                                 0.0
fav_pod_genre_Dance and Relevant cases               0.0
fav_pod_genre_Educational                            0.0
fav_pod_genre_Finance related and current affairs    0.0
pod_lis_frequency_Never                              0.0
fav_pod_genre_Food and cooking                       0.0
fav_pod_genre_General knowledge                      0.0
fav_pod_genre_Health and Fitness                     0.0
fav_pod_genre_Informative stuff                      0.0
dtype: float64

In [72]:
# ===== STEP5-B. LTV Feature Importance (유저 단위, 안전 버전) =====
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# 1) 유저 대표 취향(최근 월) 묶기
rep_cols = list(set(num_candidates + cat_candidates))
rep_cols = [c for c in rep_cols if c in df.columns]
rep_pref = (
    df.sort_values(["userid","month"])
      .groupby("userid")
      .tail(1)[["userid"] + rep_cols]
)

# 2) LTV 데이터(ltv_user)와 병합
ltv_data = ltv_user.merge(rep_pref, on="userid", how="left")

# 3) X2, y2 구성
X2 = pd.DataFrame(index=ltv_data.index)
for c in [c for c in num_candidates if c in ltv_data.columns]:
    X2[c] = pd.to_numeric(ltv_data[c], errors="coerce")

cats2 = [c for c in cat_candidates if c in ltv_data.columns]
for c in cats2:
    X2[c] = ltv_data[c].astype(str).fillna("NA")
X2 = pd.get_dummies(X2, columns=cats2, drop_first=True)
X2 = X2.replace([np.inf, -np.inf], np.nan).fillna(0)

y2 = pd.to_numeric(ltv_data["ltv"], errors="coerce").fillna(0)

# 최소 표본 검사
assert X2.shape[0] >= 20 and X2.shape[1] >= 1, f"LTV 표본/특징 부족: X2={X2.shape}, y2={y2.shape}"

# 4) 학습/검증
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.2, random_state=42)
rf_ltv = RandomForestRegressor(n_estimators=300, random_state=42)
rf_ltv.fit(X2_train, y2_train)

# 5) 중요도/성능
imp_ltv = pd.Series(rf_ltv.feature_importances_, index=X2.columns).sort_values(ascending=False)
r2_train2 = rf_ltv.score(X2_train, y2_train)
r2_test2  = rf_ltv.score(X2_test,  y2_test)

print(f"✅ LTV RF 학습 완료 | R² train={r2_train2:.3f}, test={r2_test2:.3f}")
display(imp_ltv.head(15))

✅ LTV RF 학습 완료 | R² train=0.883, test=0.155


is_premium                                                                                     0.110912
subscription_plan_Premium                                                                      0.100143
music_recc_rating                                                                              0.055372
music_Influencial_mood_Sadness or melancholy                                                   0.036534
fav_pod_genre_Sports                                                                           0.026846
preffered_pod_duration_Longer                                                                  0.026173
fav_music_genre_Rap                                                                            0.022057
spotify_listening_device_Smartphone, Computer or laptop, Smart speakers or voice assistants    0.020328
pod_host_preference_Well known individuals                                                     0.017183
spotify_listening_device_Smartphone                             

In [73]:
import os
out_dir = "data"   # 앱에서 찾는 폴더
os.makedirs(out_dir, exist_ok=True)

# 이미 계산된 변수들: kpi, ret_df, arpu_monthly, summary_pref, tests_df, imp_ltv
kpi.to_csv(f"{out_dir}/out_revenue_kpis.csv", index=False)
ret_df.to_csv(f"{out_dir}/out_premium_retention_monthly.csv", index=False)
arpu_monthly.to_csv(f"{out_dir}/out_arpu_monthly.csv", index=False)

# (이미 있는 것들도 같은 폴더로 저장해두면 깔끔)
summary_pref.to_csv(f"{out_dir}/out_pref_group_summary.csv", index=False)
tests_df.to_csv(f"{out_dir}/out_pref_significance_tests.csv", index=False)
imp_ltv.to_csv(f"{out_dir}/out_feature_importance_ltv.csv", index=False)

print("✅ Export 완료 → /data 폴더:")
print("- out_revenue_kpis.csv")
print("- out_premium_retention_monthly.csv")
print("- out_arpu_monthly.csv")
print("- out_pref_group_summary.csv")
print("- out_pref_significance_tests.csv")
print("- out_feature_importance_ltv.csv")

✅ Export 완료 → /data 폴더:
- out_revenue_kpis.csv
- out_premium_retention_monthly.csv
- out_arpu_monthly.csv
- out_pref_group_summary.csv
- out_pref_significance_tests.csv
- out_feature_importance_ltv.csv
