In [None]:
# -*- coding: utf-8 -*-
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# ========== 用户配置区域 ========== （只需修改这里！）
target_col = "churn"          # 替换为你的目标列名（本模板其实不需要目标列，仅为保持格式统一保留）
used_features = ["age", "subscription_length", "monthly_bill", "total_usage", "service_complaints"]  # 指定要使用的特征列名
n_clusters = 2                # 设定聚类数量
# ================================

# 1. 读取数据
train = pd.read_csv('/kaggle/input/houkong-moai/customer_churn.csv')
test = pd.read_csv('/kaggle/input/houkong-moai/customer_churn.csv')

# 2. 特征工程（严格按用户指定的列名处理）
def prepare_features(df):
    # 删除无用列（如果列不存在则忽略）
    df = df.drop(columns=[col for col in useless_cols if col in df.columns], errors='ignore')
    # 严格选择指定的特征列
    return df[used_features]

X_train = prepare_features(train)
X_test = prepare_features(test)
test_ids = test['customer_id'] if 'id' in test.columns else test.index  # 保留ID或默认索引

# 3. 数据标准化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 4. 训练K-Means
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
kmeans.fit(X_train_scaled)

# 5. 预测测试集
test_clusters = kmeans.predict(X_test_scaled)

# 6. 生成提交文件（强制包含id和cluster列）
result = pd.DataFrame({
    'ID': test_ids.astype(str),  # 确保ID为字符串类型
    'cluster': test_clusters      # 聚类结果
})

# 7. 保存结果（UTF-8编码，避免中文乱码）
result.to_csv('result.csv', index=False, encoding='utf-8')
print("="*40)
print("结果已保存到 result.csv")
print(f"特征维度: {X_train_scaled.shape[1]} | 聚类数: {n_clusters}")
print("文件结构示例:")
print(result.head(3))
