In [8]:
import csv
import os
import rdkit
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.Chem import AllChem
from rdkit.ML.Descriptors import MoleculeDescriptors
from padelpy import padeldescriptor

#read smiles of molecules 
file = open('Put_Prediction_File_Here/dataset.smi')
smiles = []
for line in file:
    smiles.append(line.strip('\n'))
file.close()

#data for RDKit Des
mols=[]
for i in range(0,len(smiles)):
    q=i+1
    mols.append(Chem.MolFromSmiles(smiles[i]))   

smiles_list = [Chem.MolToSmiles(mol) for mol in mols]
descs = [desc_name[0] for desc_name in Descriptors._descList]
desc_calc = MoleculeDescriptors.MolecularDescriptorCalculator(descs)
descriptors = pd.DataFrame([desc_calc.CalcDescriptors(mol) for mol in mols])
descriptors.columns = descs
descriptors.index = smiles_list
index_list = list(map(str,list(range(len(mols)))))
y = pd.DataFrame(index_list)
y.index = smiles_list
y.columns = ["index"]
dataset = pd.concat([y, descriptors], axis=1)
dataset.to_csv('Put_Prediction_File_Here/Rdkit_Descriptor.csv')

#data for RdKit FP
with open('Put_Prediction_File_Here/FP_Morgan2.csv', 'w', newline='')as f:
    f_csv = csv.writer(f)
    for sm_num in range(0,len(smiles)):
        bits = []
        mol = Chem.MolFromSmiles(smiles[sm_num])
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=2048)
        bit = fp.ToBitString()
        for i in range(0, 2048):
            bits.append(bit[i])
        f.write(smiles[sm_num])
        f.write(',')
        f_csv.writerow(bits)
        bits.clear()
        

In [27]:
from joblib import dump, load
import csv
import os
import numpy as np
import lightgbm as lgb
from sklearn import preprocessing


file = open('Put_Prediction_File_Here/dataset.smi')
smiles = []
for line in file:
    smiles.append(line.strip('\n'))
file.close()
# Read Several Files, load as x1(des1), x2(des2), name.
def read_file_descriptor(file_path,n=1,startline=1):
    x_input = []
    jishu=0
    with open(file_path, 'r',encoding='utf-8') as f:
        f_csv = csv.reader(f)
        for row in f_csv:
            jishu+=1
            if jishu>startline:
                x_input.append(row[n:])     
    x_input = np.array(x_input)
    for i in range(0,len(x_input)):
        try:
            x_input[i]=x_input[i].astype('float')
        except:
            for q in range(0,len(x_input[i])):
                try:
                    x_input[i][q]=x_input[i][q].astype('float')
                except:
                    x_input[i][q]=float(0)
    x_input=x_input.astype('float')
    return x_input
def read_file_name(file_path):
    name=[]
    file = open(file_path)
    for line in file:
        name.append(line.strip('\n'))
    file.close()
    return name

x_des = read_file_descriptor(file_path='Put_Prediction_File_Here/Rdkit_Descriptor.csv',n=2,startline=1)
x_Morgan2 = read_file_descriptor(file_path='Put_Prediction_File_Here/FP_Morgan2.csv',n=1,startline=0)

name = smiles
x2 = np.concatenate((x_des,x_Morgan2),axis=1)[:,0:]


# Load trained model.
def load_model_predict(model_file,x_dataset):
    reg_layer1 = load(model_file)
    x_pre = reg_layer1.predict(x_dataset)
    return x_pre

x_pre_first_layer=[]
print('Predicton Begin')
x_pre_first_layer.append(load_model_predict('wholedataset_Model/lgb_des2.pkl',x2))
x_pre_first_layer.append(load_model_predict('wholedataset_Model/xgboost_des2.pkl',x2))
x_pre_first_layer.append(load_model_predict('wholedataset_Model/GBRT_des2.pkl',x2))
x_pre_first_layer.append(load_model_predict('wholedataset_Model/Lasso_des2.pkl',x2))
x_pre_first_layer.append(load_model_predict('wholedataset_Model/RF_des2.pkl',x2))
print('Prediction Finish')

x_pre_first_layer_T=list(zip(*x_pre_first_layer))
x_pre_first_layer_T = np.array(x_pre_first_layer_T)

reg_second_layer = load('wholedataset_Model/Lars_secondlayer.pkl')
x_pre_second_layer = reg_second_layer.predict(x_pre_first_layer_T)

for i in range(0,len(name)):
    print(round(x_pre_second_layer[i]))

Predicton Begin
Prediction Finish
272.0
187.0
181.0
206.0
177.0
193.0
200.0
198.0
187.0
204.0
215.0
172.0
181.0
235.0
214.0
204.0
293.0
224.0
222.0
228.0
200.0
235.0
227.0
225.0
223.0
225.0
221.0
223.0
222.0
220.0
218.0
229.0
224.0
230.0
233.0
232.0
227.0
212.0
228.0
219.0
242.0
288.0
286.0
231.0
290.0
295.0
296.0
297.0
298.0
268.0
263.0
261.0
238.0
283.0
260.0
225.0
265.0
260.0
238.0
236.0
237.0
237.0
288.0
264.0
281.0
264.0
261.0
282.0
255.0
298.0
262.0
288.0
254.0
251.0
264.0
296.0
292.0
289.0
294.0
293.0
290.0
301.0
299.0
293.0
311.0
300.0
291.0
286.0
297.0
300.0
305.0
296.0
295.0
289.0
261.0
288.0
265.0
292.0
295.0
302.0
255.0
305.0
265.0
298.0
260.0
296.0
299.0
301.0
247.0
293.0
261.0
305.0
241.0
305.0
300.0
304.0
278.0
289.0
293.0
226.0
263.0
284.0
290.0
225.0
296.0
236.0
288.0
286.0
308.0
283.0
268.0
287.0
312.0
284.0
310.0
292.0
276.0
307.0
296.0
302.0
292.0
309.0
263.0
271.0
289.0
293.0
297.0
300.0
290.0
202.0
224.0
226.0
232.0
229.0
291.0
299.0
304.0
305.0
306.0
267.0
269.0
