In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

# 1. 加载数据
train = pd.read_csv("/kaggle/input/houkong-moai/customer_churn.csv")  # 训练集
test = pd.read_csv("/kaggle/input/houkong-moai/customer_churn.csv")    # 测试集

# 2. 基本配置
target = "churn"    # 目标列名（二分类0/1）

# 自动选择特征列
features = ["age","subscription_length","monthly_bill","total_usage","service_complaints"]

# 4. 数据标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(train[features])
X_test = scaler.transform(test[features])
y_train = train[target]

# 5. 训练优化模型
model = LogisticRegression(
    max_iter=1000,      # 确保收敛
    class_weight="balanced",  # 处理类别不平衡
    solver="liblinear"   # 适合小数据集
)
model.fit(X_train, y_train)

# 6. 预测并保存结果
test_pred = model.predict(X_test)
pd.DataFrame({
    "ID": test["customer_id"],  # 保留ID列
    "prediction": test_pred
}).to_csv("predictions.csv", index=False)

print("预测完成！结果已保存")
print(f"训练准确率: {model.score(X_train, y_train):.2%}")
