# Performance of SigmaCCS on the test set with different coordinates generated by ETKDG and MMFF94

In [1]:
import sys
sys.path.append("..")
from sigma.GraphData import *
from sigma.model import *
import pandas as pd
import random
from pandas import Series,DataFrame
import numpy as np
from tqdm import tqdm
import statistics

In [2]:
ifile = '../data/TestData.csv'
ParameterPath = '../parameter/parameter.pkl'
mfileh5 = '../model/model_new.h5'

In [3]:
res = random.sample(range(1, 100), 30)
def Generating_coordinates_different_seeds(smiles, adduct, ccs, a, All_Atoms, ps = AllChem.ETKDGv3(),):
    '''
    * Using ETKDG to generate 3D coordinates of molecules
    *
    * Attributes
    * ----------
    * smiles    : The SMILES string of the molecule
    * adduct    : Adduct of molecules
    * ccs       : CCS of molecules
    * All_Atoms : Element set (The type of element provided must cover all elements contained in the molecule)
    * ps        : ETKDG algorithm provided by RDkit
    *
    * Returns
    * -------
    * succ_smiles : SMILES of The molecules with 3D conformation can be successfully generated
    * succ_adduct : Adduct of The molecules with 3D conformation can be successfully generated
    * succ_ccs    : CCS of The molecules with 3D conformation can be successfully generated
    * Coordinate  : 3D coordinates of molecules
    '''
    succ_smiles = []
    succ_adduct = []
    succ_ccs    = []
    Coordinate  = []
    
    INDEX = -1
    for smi in smiles:
        INDEX += 1
        try:
            iMol = Chem.MolFromSmiles(smi)
            iMol = Chem.RemoveHs(iMol)
        except:
            continue;
        atoms = [atom.GetSymbol() for atom in iMol.GetAtoms()]
        bonds = [bond for bond in iMol.GetBonds()]
        # Is the number of atoms greater than 1
        if len(atoms) == 1 and len(bonds) <= 1:
            continue;
        # Determine whether the element is in all_ In atoms
        Elements_not_included = 0
        for atom in atoms:
            if atom not in All_Atoms:
                Elements_not_included = 1
        if Elements_not_included == 1:
            continue;
        # Adding H to a molecular object
        iMol3D = Chem.AddHs(iMol)
        
        # The 3D conformation of the generating molecule
        
        ps.randomSeed = a   #!!!!!!!!
        ps.maxAttempts = 1
        ps.numThreads = 0
        ps.useRandomCoords = True
        re = AllChem.EmbedMultipleConfs(iMol3D, numConfs = 1, params = ps)
        # Whether the conformation is successful or not
        if len(re) == 0:
            print('conformation is error')
            continue;
        # MMFF94
        re = AllChem.MMFFOptimizeMoleculeConfs(iMol3D,  numThreads = 0)

        This_mol_Coordinate = []
        for atom in iMol3D.GetAtoms():
            Coord = list(iMol3D.GetConformer().GetAtomPosition(atom.GetIdx()))
            This_mol_Coordinate.append(Coord)
        Coordinate.append(This_mol_Coordinate)
        
        succ_smiles.append(smi)
        succ_adduct.append(adduct[INDEX])
        succ_ccs.append(ccs[INDEX])
            
    return succ_smiles, succ_adduct, succ_ccs,  Coordinate

In [4]:
R2 = []
MeRE = []
randomSeeds = []

for ii in range(30):
    smiles, adduct, ccs = read_data(ifile)
    param = parameter.Parameter()
    with open(ParameterPath,'rb') as file:
        param  = pickle.loads(file.read())  
    a = res[ii]
    smiles, adduct, ccs, Coordinate = Generating_coordinates_different_seeds(smiles, adduct, ccs, a, param.All_Atoms)   ###
    
    for i in range(len(Coordinate)):
        Coordinate[i] = (np.array(Coordinate[i]) - param.Min_Coor) / (param.Max_Coor - param.Min_Coor)
    
    adj, features, edge_features = convertToGraph(smiles, Coordinate, param.All_Atoms)
    DataSet = MyDataset(features, adj, edge_features, ccs)
    
    ECC_Model = load_Model_from_file(mfileh5)
        
    re = predict(ECC_Model,param.adduct_SET,DataSet,adduct,)
    print("The number of molecules in the test set :", len(re), '\n')

    Sigma   = re
    CCS     = ccs
    SigmaPer = Metrics(CCS, Sigma)
    print("randomSeed :", a)
    print("R2 Score :", SigmaPer[0])
    print("Median Relative Error :", SigmaPer[1], '%')
    
    R2.append(SigmaPer[0])
    MeRE.append(SigmaPer[1])
    randomSeeds.append(a)

  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:27<00:00, 20.37it/s]
The number of molecules in the test set : 559 

randomSeed : 45
R2 Score : 0.9937630410609587
Median Relative Error : 1.2043710031130037 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:26<00:00, 20.76it/s]
The number of molecules in the test set : 559 

randomSeed : 34
R2 Score : 0.9938415869677824
Median Relative Error : 1.2026298794199095 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:17<00:00, 31.85it/s]
The number of molecules in the test set : 559 

randomSeed : 43
R2 Score : 0.9937822358466177
Median Relative Error : 1.2783695420865757 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:24<00:00, 23.18it/s]
The number of molecules in the test set : 559 

randomSeed : 80
R2 Score : 0.9936336531518316
Median Relative Error : 1.231228652878192 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:23<00:00, 23.75it/s]
The number of molecules in the test set : 559 

randomSeed : 68
R2 Score : 0.9936368929409544
Median Relative Error : 1.2201551784600997 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:23<00:00, 23.44it/s]
The number of molecules in the test set : 559 

randomSeed : 92
R2 Score : 0.9938535524303723
Median Relative Error : 1.18266739425306 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:22<00:00, 25.33it/s]
The number of molecules in the test set : 559 

randomSeed : 88
R2 Score : 0.9937978154275091
Median Relative Error : 1.2124823867651036 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:23<00:00, 23.68it/s]
The number of molecules in the test set : 559 

randomSeed : 41
R2 Score : 0.9937839445751436
Median Relative Error : 1.2473667834330997 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:23<00:00, 23.98it/s]
The number of molecules in the test set : 559 

randomSeed : 86
R2 Score : 0.9936868037264808
Median Relative Error : 1.2224954272073412 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:24<00:00, 23.00it/s]
The number of molecules in the test set : 559 

randomSeed : 98
R2 Score : 0.9937185596444922
Median Relative Error : 1.2547213135645647 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:17<00:00, 32.23it/s]
The number of molecules in the test set : 559 

randomSeed : 87
R2 Score : 0.9936724794038739
Median Relative Error : 1.2183028382140344 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:25<00:00, 22.20it/s]
The number of molecules in the test set : 559 

randomSeed : 82
R2 Score : 0.9937338418070045
Median Relative Error : 1.2724800411095738 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:24<00:00, 23.13it/s]
The number of molecules in the test set : 559 

randomSeed : 21
R2 Score : 0.993776196783835
Median Relative Error : 1.2548317472298371 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:23<00:00, 23.43it/s]
The number of molecules in the test set : 559 

randomSeed : 83
R2 Score : 0.993827077655933
Median Relative Error : 1.2295660324829787 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:17<00:00, 31.49it/s]
The number of molecules in the test set : 559 

randomSeed : 31
R2 Score : 0.9939187213489945
Median Relative Error : 1.206380066555967 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:26<00:00, 20.76it/s]
The number of molecules in the test set : 559 

randomSeed : 66
R2 Score : 0.9937850018976171
Median Relative Error : 1.2385847406944615 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:25<00:00, 22.29it/s]
The number of molecules in the test set : 559 

randomSeed : 65
R2 Score : 0.9937679194406991
Median Relative Error : 1.239196193275269 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:25<00:00, 21.49it/s]
The number of molecules in the test set : 559 

randomSeed : 90
R2 Score : 0.9936766507091022
Median Relative Error : 1.2592734691079204 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:23<00:00, 23.58it/s]
The number of molecules in the test set : 559 

randomSeed : 60
R2 Score : 0.9937601156479846
Median Relative Error : 1.2455721079192623 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:23<00:00, 24.08it/s]
The number of molecules in the test set : 559 

randomSeed : 32
R2 Score : 0.9936606459112262
Median Relative Error : 1.2222484323981095 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:24<00:00, 23.05it/s]
The number of molecules in the test set : 559 

randomSeed : 67
R2 Score : 0.9937357415420917
Median Relative Error : 1.2411803751089934 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:17<00:00, 32.64it/s]
The number of molecules in the test set : 559 

randomSeed : 49
R2 Score : 0.9938092300936576
Median Relative Error : 1.2320123638785607 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:23<00:00, 23.57it/s]
The number of molecules in the test set : 559 

randomSeed : 46
R2 Score : 0.9937681117449658
Median Relative Error : 1.2091947637048417 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:24<00:00, 23.02it/s]
The number of molecules in the test set : 559 

randomSeed : 13
R2 Score : 0.9938110605411836
Median Relative Error : 1.2039685732496903 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:24<00:00, 22.35it/s]
The number of molecules in the test set : 559 

randomSeed : 36
R2 Score : 0.9937978573201155
Median Relative Error : 1.1657171031450995 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:17<00:00, 31.31it/s]
The number of molecules in the test set : 559 

randomSeed : 69
R2 Score : 0.9936400851120422
Median Relative Error : 1.2538308028427638 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:24<00:00, 22.51it/s]
The number of molecules in the test set : 559 

randomSeed : 52
R2 Score : 0.9938545988573427
Median Relative Error : 1.2199781385712083 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:24<00:00, 22.92it/s]
The number of molecules in the test set : 559 

randomSeed : 37
R2 Score : 0.9938509774176151
Median Relative Error : 1.1873798209848483 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:10<00:00, 54.86it/s]
The number of molecules in the test set : 559 

randomSeed : 35
R2 Score : 0.9938795558837294
Median Relative Error : 1.2301139283978768 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 558/559 [00:09<00:00, 55.86it/s]
The number of molecules in the test set : 559 

randomSeed : 91
R2 Score : 0.9938147113891018
Median Relative Error : 1.1872586468987925 %


In [5]:
r2_mean = statistics.mean(R2)
r2_std = statistics.stdev(R2)

MeRE_mean = statistics.mean(MeRE)
MeRE_std = statistics.stdev(MeRE)

print("the mean value of R2 :", round(r2_mean, 4))
print("the standard deviation of R2 :", round(r2_std, 5))

print("the mean value of Median Relative Error :", round(MeRE_mean, 3), '%')
print("the standard deviation of Median Relative Error :", round(MeRE_std, 4), '%')

the mean value of R2 : 0.9938
the standard deviation of R2 : 8e-05
the mean value of Median Relative Error : 1.226 %
the standard deviation of Median Relative Error : 0.0269 %


In [6]:
for j in range(len(randomSeeds)):
    seed = randomSeeds[j]
    r = R2[j]
    re = MeRE[j]
    print("randomSeed :", seed)
    print("R2 Score :", round(r, 4))
    print("Median Relative Error :", round(re, 3), '%')

randomSeed : 45
R2 Score : 0.9938
Median Relative Error : 1.204 %
randomSeed : 34
R2 Score : 0.9938
Median Relative Error : 1.203 %
randomSeed : 43
R2 Score : 0.9938
Median Relative Error : 1.278 %
randomSeed : 80
R2 Score : 0.9936
Median Relative Error : 1.231 %
randomSeed : 68
R2 Score : 0.9936
Median Relative Error : 1.22 %
randomSeed : 92
R2 Score : 0.9939
Median Relative Error : 1.183 %
randomSeed : 88
R2 Score : 0.9938
Median Relative Error : 1.212 %
randomSeed : 41
R2 Score : 0.9938
Median Relative Error : 1.247 %
randomSeed : 86
R2 Score : 0.9937
Median Relative Error : 1.222 %
randomSeed : 98
R2 Score : 0.9937
Median Relative Error : 1.255 %
randomSeed : 87
R2 Score : 0.9937
Median Relative Error : 1.218 %
randomSeed : 82
R2 Score : 0.9937
Median Relative Error : 1.272 %
randomSeed : 21
R2 Score : 0.9938
Median Relative Error : 1.255 %
randomSeed : 83
R2 Score : 0.9938
Median Relative Error : 1.23 %
randomSeed : 31
R2 Score : 0.9939
Median Relative Error : 1.206 %
randomSeed :

# Performance of SigmaCCS on the external test set with different coordinates generated by ETKDG and MMFF94

In [7]:
ifile = '../data/ExternalTestData.csv'
ParameterPath = '../parameter/parameter.pkl'
mfileh5 = '../model/model_new.h5'

In [8]:
res = random.sample(range(1, 100), 30)
R2 = []
MeRE = []
randomSeeds = []

for ii in range(30):
    smiles, adduct, ccs = read_data(ifile)
    param = parameter.Parameter()
    with open(ParameterPath,'rb') as file:
        param  = pickle.loads(file.read())  
    a = res[ii]
    smiles, adduct, ccs, Coordinate = Generating_coordinates_different_seeds(smiles, adduct, ccs, a, param.All_Atoms)   ###
    
    for i in range(len(Coordinate)):
        Coordinate[i] = (np.array(Coordinate[i]) - param.Min_Coor) / (param.Max_Coor - param.Min_Coor)
    
    adj, features, edge_features = convertToGraph(smiles, Coordinate, param.All_Atoms)
    DataSet = MyDataset(features, adj, edge_features, ccs)
    
    ECC_Model = load_Model_from_file(mfileh5)
        
    re = predict(ECC_Model,param.adduct_SET,DataSet,adduct,)
    print("The number of molecules in the test set :", len(re), '\n')

    Sigma   = re
    CCS     = ccs
    SigmaPer = Metrics(CCS, Sigma)
    print("randomSeed :", a)
    print("R2 Score :", SigmaPer[0])
    print("Median Relative Error :", SigmaPer[1], '%')
    
    R2.append(SigmaPer[0])
    MeRE.append(SigmaPer[1])
    randomSeeds.append(a)

  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 62.32it/s]
The number of molecules in the test set : 344 

randomSeed : 97
R2 Score : 0.9792576132103041
Median Relative Error : 1.871191665827953 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 65.45it/s]
The number of molecules in the test set : 344 

randomSeed : 22
R2 Score : 0.9793375253036047
Median Relative Error : 1.8811994244720718 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 65.14it/s]
The number of molecules in the test set : 344 

randomSeed : 45
R2 Score : 0.9798463606673514
Median Relative Error : 1.8922467640096974 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 63.00it/s]
The number of molecules in the test set : 344 

randomSeed : 91
R2 Score : 0.9795149057932717
Median Relative Error : 1.891354330104663 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 63.39it/s]
The number of molecules in the test set : 344 

randomSeed : 78
R2 Score : 0.9798731884971258
Median Relative Error : 1.8710280832061295 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 63.57it/s]
The number of molecules in the test set : 344 

randomSeed : 80
R2 Score : 0.9796024313458452
Median Relative Error : 1.954111069712016 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 64.37it/s]
The number of molecules in the test set : 344 

randomSeed : 93
R2 Score : 0.9797033567297349
Median Relative Error : 1.953890904108012 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 64.06it/s]
The number of molecules in the test set : 344 

randomSeed : 3
R2 Score : 0.9798650384961817
Median Relative Error : 1.8781685159614745 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 64.06it/s]
The number of molecules in the test set : 344 

randomSeed : 50
R2 Score : 0.9792594361555591
Median Relative Error : 2.00598148678338 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 64.07it/s]
The number of molecules in the test set : 344 

randomSeed : 28
R2 Score : 0.979158296432702
Median Relative Error : 1.9685103195683284 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 62.10it/s]
The number of molecules in the test set : 344 

randomSeed : 76
R2 Score : 0.9802473035458075
Median Relative Error : 1.8707653587724158 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 62.59it/s]
The number of molecules in the test set : 344 

randomSeed : 6
R2 Score : 0.9797443899891197
Median Relative Error : 1.9761153965484195 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 64.64it/s]
The number of molecules in the test set : 344 

randomSeed : 15
R2 Score : 0.9792707291134716
Median Relative Error : 2.007216772223062 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 62.82it/s]
The number of molecules in the test set : 344 

randomSeed : 51
R2 Score : 0.9796338821313076
Median Relative Error : 1.9392233003012453 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 62.89it/s]
The number of molecules in the test set : 344 

randomSeed : 66
R2 Score : 0.9797011101616764
Median Relative Error : 1.9284416454511164 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 64.92it/s]
The number of molecules in the test set : 344 

randomSeed : 79
R2 Score : 0.9801762051108515
Median Relative Error : 1.900089657848237 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 65.04it/s]
The number of molecules in the test set : 344 

randomSeed : 12
R2 Score : 0.9799317991273397
Median Relative Error : 1.9175556064946535 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 65.32it/s]
The number of molecules in the test set : 344 

randomSeed : 49
R2 Score : 0.9798158159939795
Median Relative Error : 1.9511861130801142 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 65.88it/s]
The number of molecules in the test set : 344 

randomSeed : 2
R2 Score : 0.9794762637337676
Median Relative Error : 1.9043251765587683 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 66.15it/s]
The number of molecules in the test set : 344 

randomSeed : 21
R2 Score : 0.9795188216508944
Median Relative Error : 1.866909841923007 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 63.21it/s]
The number of molecules in the test set : 344 

randomSeed : 33
R2 Score : 0.9794073082441926
Median Relative Error : 1.9309082635004235 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 63.87it/s]
The number of molecules in the test set : 344 

randomSeed : 16
R2 Score : 0.9795971815747104
Median Relative Error : 1.8672512268421717 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 62.92it/s]
The number of molecules in the test set : 344 

randomSeed : 7
R2 Score : 0.9800257435596267
Median Relative Error : 1.9620538572171269 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 63.80it/s]
The number of molecules in the test set : 344 

randomSeed : 69
R2 Score : 0.9793057629911172
Median Relative Error : 1.9798182953168395 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 63.59it/s]
The number of molecules in the test set : 344 

randomSeed : 14
R2 Score : 0.979975310607948
Median Relative Error : 1.9945601826554902 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 63.20it/s]
The number of molecules in the test set : 344 

randomSeed : 60
R2 Score : 0.9795083090177947
Median Relative Error : 1.9292675343913497 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 62.54it/s]
The number of molecules in the test set : 344 

randomSeed : 87
R2 Score : 0.9795895438255904
Median Relative Error : 1.9651177686386447 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 64.42it/s]
The number of molecules in the test set : 344 

randomSeed : 13
R2 Score : 0.9797374382133336
Median Relative Error : 1.9189949113280895 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 64.69it/s]
The number of molecules in the test set : 344 

randomSeed : 65
R2 Score : 0.9795750322466021
Median Relative Error : 1.9842256685621396 %


  features = np.asarray(features)
  edge_features = np.asarray(edge_features)
  f"The initializer {self.__class__.__name__} is unseeded "


Predictions: 100%|█████████████████████████████████████▉| 343/344 [00:05<00:00, 65.08it/s]
The number of molecules in the test set : 344 

randomSeed : 62
R2 Score : 0.9793332338379265
Median Relative Error : 1.950404333516613 %


In [9]:
r2_mean = statistics.mean(R2)
r2_std = statistics.stdev(R2)

MeRE_mean = statistics.mean(MeRE)
MeRE_std = statistics.stdev(MeRE)

print("the mean value of R2 :", round(r2_mean, 4))
print("the standard deviation of R2 :", round(r2_std, 5))

print("the mean value of Median Relative Error :", round(MeRE_mean, 3), '%')
print("the standard deviation of Median Relative Error :", round(MeRE_std, 4), '%')

the mean value of R2 : 0.9796
the standard deviation of R2 : 0.00028
the mean value of Median Relative Error : 1.93 %
the standard deviation of Median Relative Error : 0.0445 %


In [10]:
for j in range(len(randomSeeds)):
    seed = randomSeeds[j]
    r = R2[j]
    re = MeRE[j]
    print("randomSeed :", seed)
    print("R2 Score :", round(r, 4))
    print("Median Relative Error :", round(re, 3), '%', '\n')

randomSeed : 97
R2 Score : 0.9793
Median Relative Error : 1.871 % 

randomSeed : 22
R2 Score : 0.9793
Median Relative Error : 1.881 % 

randomSeed : 45
R2 Score : 0.9798
Median Relative Error : 1.892 % 

randomSeed : 91
R2 Score : 0.9795
Median Relative Error : 1.891 % 

randomSeed : 78
R2 Score : 0.9799
Median Relative Error : 1.871 % 

randomSeed : 80
R2 Score : 0.9796
Median Relative Error : 1.954 % 

randomSeed : 93
R2 Score : 0.9797
Median Relative Error : 1.954 % 

randomSeed : 3
R2 Score : 0.9799
Median Relative Error : 1.878 % 

randomSeed : 50
R2 Score : 0.9793
Median Relative Error : 2.006 % 

randomSeed : 28
R2 Score : 0.9792
Median Relative Error : 1.969 % 

randomSeed : 76
R2 Score : 0.9802
Median Relative Error : 1.871 % 

randomSeed : 6
R2 Score : 0.9797
Median Relative Error : 1.976 % 

randomSeed : 15
R2 Score : 0.9793
Median Relative Error : 2.007 % 

randomSeed : 51
R2 Score : 0.9796
Median Relative Error : 1.939 % 

randomSeed : 66
R2 Score : 0.9797
Median Relative 