In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.metrics import roc_auc_score, make_scorer
from lightgbm import LGBMClassifier

# 固定随机种子
SEED = 42

# Step 1: 手动拆分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, stratify=y, random_state=SEED
)

# Step 2: 设置 LightGBM 基础模型
lgb_base = LGBMClassifier(
    class_weight='balanced',
    random_state=SEED,
    n_jobs=-1  # 多线程加速
)

# Step 3: 设置超参数网格（你可以进一步微调）
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.05, 0.1],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

# Step 4: 设置交叉验证器和 AUC 评分器
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
scorer = make_scorer(roc_auc_score, needs_proba=True)

# Step 5: 网格搜索
grid_search = GridSearchCV(
    lgb_base,
    param_grid,
    scoring=scorer,
    cv=cv,
    verbose=1,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

# Step 6: 最佳模型评估
best_model = grid_search.best_estimator_
y_prob_test = best_model.predict_proba(X_test)[:, 1]
auc_test = roc_auc_score(y_test, y_prob_test)

print("✅ Best params:", grid_search.best_params_)
print(f"✅ Test AUC with best LightGBM model: {auc_test:.4f}")


In [None]:
import plotly.graph_objects as go
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score
import numpy as np
import pandas as pd

# 定义 sigmoid 函数
def sigmoid(t, L, k, t0):
    return L / (1 + np.exp(-k * (t - t0)))

# 适用于汇总所有散点的 sigmoid 拟合 + plotly 画图
def fit_and_plot_sigmoid_plotly(df_volume):
    df = df_volume.copy()
    df = df.sort_values('rolling_week')

    # Step 1: 给 rolling_week 编号（顺序时间轴）
    week_to_index = {week: i for i, week in enumerate(sorted(df['rolling_week'].unique()))}
    df['t'] = df['rolling_week'].map(week_to_index)

    # Step 2: 拟合 sigmoid
    t_all = df['t'].values
    y_all = df['wineb_marketshare'].values

    try:
        popt, _ = curve_fit(sigmoid, t_all, y_all, p0=[1, 1, np.median(t_all)], maxfev=10000)
        y_pred = sigmoid(t_all, *popt)
        r2 = r2_score(y_all, y_pred)
        L, k, t0 = popt
        tipping_index = int(round(t0))
        
        # 反查 tipping 对应的 rolling_week
        index_to_week = {v: k for k, v in week_to_index.items()}
        tipping_week = index_to_week.get(tipping_index, "N/A")

        df['predicted'] = y_pred

        # Step 3: 绘图
        fig = go.Figure()

        # 实际散点
        fig.add_trace(go.Scatter(
            x=df['t'],
            y=df['wineb_marketshare'],
            mode='markers',
            name='Observed Market Share',
            marker=dict(size=5, opacity=0.4)
        ))

        # 拟合曲线
        fig.add_trace(go.Scatter(
            x=df['t'],
            y=df['predicted'],
            mode='lines',
            name='Sigmoid Fit',
            line=dict(width=2)
        ))

        # 拐点垂线
        fig.add_vline(
            x=t0,
            line=dict(dash='dash', color='red'),
            annotation_text=f"Tipping Point (t0 ≈ {t0:.1f})\nWeek: {tipping_week}",
            annotation_position="top right"
        )

        fig.update_layout(
            title="Sigmoid Fit to All Market Share Points",
            xaxis_title="Time Index (t)",
            yaxis_title="Market Share",
            height=500,
            width=900
        )

        fig.show()

        return {
            'L': L,
            'k': k,
            't0': t0,
            'r2': r2,
            'tipping_week': tipping_week
        }

    except Exception as e:
        print(f"❌ 拟合失败: {e}")
        return None
