In [None]:
import pandas as pd
import numpy as np
from rdkit import Chem
from rdkit.Chem import Descriptors, rdMolDescriptors
from mordred import Calculator, descriptors
import seaborn as sns
import matplotlib.pyplot as plt

# 读取Excel文件
#df = pd.read_excel(r"D:\张雨林\电子科大合作文章\电子科大合作文章\电子科大配方表-离子液体默描述符三步特征筛选.xlsx"，
df = pd.read_excel(r"D:\张雨林\电子科大合作文章\电子科大合作文章\电子科大配方表-离子液体默描述符三步特征筛选.xlsx", 
                   sheet_name="Sheet3", 
                   header=0)

# 提取第3列和第5列的数据
#polymers = df.iloc[:, 2].unique()
ionic_liquids = df.iloc[:, 2].unique()

# 定义SMILES字典

cation_smiles = {
    'EMIM': 'CCN1C=C[N+](C)=C1',
    'BMIM': 'CCCCN1C=C[N+](C)=C1',
    'HMIM':'CCCCCCN1C=C[N+](C)=C1',
    'Li':'[Li+]'
}

anion_smiles = {
    'DCA': 'N#CC#N',
    'BF4': 'F[B-](F)(F)F',
    'TFSI': 'F[S](=O)(=O)N[S](=O)(=O)C(F)(F)F',
    'ES': 'CCOS(=O)(=O)[O-]',
    'DPO3': 'O=P([O-])(O)O',
    'NO3': '[N+](=O)([O-])[O-]',
    'OTF': 'O=S(=O)([O-])C(F)(F)F',
    'Cl':'[Cl-]',
    'ACT':'CC(=O)[O-]',
    'HSO3':'O=S(=O)([O-])O',
    
}
def calculate_descriptors(smiles):
    mol = Chem.MolFromSmiles(smiles)
    
    # RDKit descriptors
    rdkit_desc = {name: func(mol) for name, func in Descriptors._descList}
    
    # Mordred descriptors
    calc = Calculator(descriptors)
    mordred_desc = calc(mol).fill_missing()
    
    return {**rdkit_desc, **mordred_desc.asdict()}

# 计算描述符
#polymer_descriptors = {polymer: calculate_descriptors(smiles) for polymer, smiles in polymer_smiles.items()}
cation_descriptors = {cation: calculate_descriptors(smiles) for cation, smiles in cation_smiles.items()}
anion_descriptors = {anion: calculate_descriptors(smiles) for anion, smiles in anion_smiles.items()}

# 合并离子液体描述符
ionic_liquid_descriptors = {}
for il in ionic_liquids:
    cation, anion = il.split()
    ionic_liquid_descriptors[il] = {
        **{f"cation_{k}": v for k, v in cation_descriptors[cation].items()},
        **{f"anion_{k}": v for k, v in anion_descriptors[anion].items()}
    }

# 创建特征表格
feature_df = pd.DataFrame()
for _, row in df.iterrows():
    #polymer = row.iloc[2]
    ionic_liquid = row.iloc[2]
    features =ionic_liquid_descriptors[ionic_liquid]
       # **polymer_descriptors[polymer],
    
    feature_df = pd.concat([feature_df, pd.DataFrame([features])], ignore_index=True)


# 输出特征数量和分布趋势
print(f"总特征数量: {feature_df.shape[1]}")
#rint(f"聚合物特征数量: {len(polymer_descriptors[polymers[0]])}")
print(f"离子液体特征数量: {len(ionic_liquid_descriptors[ionic_liquids[0]])}")

# 可视化特征分布


# 保存特征表格
feature_df.to_csv("material_features.csv", index=False)



