In [1]:
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem, DataStructs
import xgboost as xgb

train= pd.read_csv('/kaggle/input/train-11/train (1).csv')
test= pd.read_csv('/kaggle/input/testingdataset/test.csv')
# --- 指纹函数（保持你的写法，只修正一个小typo: np.zero -> np.zeros）---
def smiles_to_fp(smi, radius=2, nBits=2048):
    mol = Chem.MolFromSmiles(smi)
    if mol is None:
        return np.zeros((nBits,), dtype=np.uint8)
    fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits)
    arr = np.zeros((nBits,), dtype=np.uint8)
    DataStructs.ConvertToNumpyArray(fp, arr)
    return arr

# --- 生成特征矩阵 ---
X      = np.vstack(train["SMILES"].apply(smiles_to_fp))
X_test = np.vstack(test["SMILES"].apply(smiles_to_fp))

# --- 目标列表（按要求顺序）---
targets = ["Tg", "FFV", "Tc", "Density", "Rg"]

# --- 初始化提交 DataFrame ---
submission = pd.DataFrame({"id": test["id"].values})

# --- 对每个目标重复同样流程：筛mask -> 训练 -> 预测 -> 拼列 ---
for tgt in targets:
    mask = train[tgt].notna()
    X_train = X[mask]
    y_train = train.loc[mask, tgt].values

    model = xgb.XGBRegressor(
        n_estimators=500,
        learning_rate=0.05,
        max_depth=8,
        random_state=42,
        n_jobs=-1
    )
    model.fit(X_train, y_train)

    preds = model.predict(X_test)
    submission[tgt] = preds

# --- 按顺序导出（列顺序：id, Tg, FFV, Tc, Density, Rg）---
submission = submission[["id"] + targets]
# 保存到/kaggle/working/目录下
submission.to_csv("/kaggle/working/submission.csv", index=False)




In [2]:
pip install rdkit


Note: you may need to restart the kernel to use updated packages.
