In [1]:
# coding: utf-8

# In[1]:


# rdkit
from rdkit import Chem
from rdkit.Chem import AllChem, MACCSkeys, RDKFingerprint, Descriptors, rdMolDescriptors, Lipinski
from rdkit.Chem.AtomPairs import Pairs, Torsions
from rdkit.Avalon import pyAvalonTools
from rdkit.Chem.EState import Fingerprinter
from rdkit.Chem import rdFingerprintGenerator

# sklearn
from sklearn import svm
from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score, precision_score, \
    recall_score, cohen_kappa_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import os

# typing
from typing import Union

# connecting to chembl through web client
from chembl_webresource_client.new_client import new_client

from datetime import datetime
import shutil

from datetime import datetime
import os
from concurrent.futures import ProcessPoolExecutor, as_completed

import shutil
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import numpy as np
import time

from joblib import dump
import pickle

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import train_test_split

# In[2]:

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier



In [2]:
def get_smiles_from_drug_names(drug_list, new_client, return_type='list'):
    molecule = new_client.molecule
    smiles_list = []
    drug_names = []

    for drug in drug_list:
        res = molecule.search(drug)

        if res:
            smiles_list.append(res[0]['molecule_structures']['canonical_smiles'])
            drug_names.append(drug)

    # Check return type
    if return_type == 'dataframe':
        # Convert list to DataFrame
        smiles_df = pd.DataFrame({
            'drug_name': drug_names,
            'smiles': smiles_list
        })
        return smiles_df
    elif return_type == 'list':
        return smiles_list
    else:
        raise ValueError("Invalid return_type. Expected 'dataframe' or 'list'.")


# In[3]:


def data_training(path: str, label: Union[int, str]) -> pd.DataFrame:
    """Loads and combines CSV files from a directory, assigns class labels, 
    removes duplicates, and returns a DataFrame.

    Args:
        path (str): Directory path containing the CSV files.
        label (Union[int, str]): Label value to be assigned as class 1.

    Returns:
        pd.DataFrame: DataFrame containing the 'smiles' column and class labels.

    Raises:
        ValueError: If the label is not in the valid range.
    """
    # Number of CSV files in the directory
    number_of_labels = len(glob.glob(f'{path}/*.csv'))

    # Convert label to string and pad with zeros if necessary
    label_str = str(label).zfill(2)

    # Validate label
    if len(label_str) > 2 or not 0 <= int(label_str) <= number_of_labels:
        raise ValueError(f"The label must be between 0 and {number_of_labels}.")

    # Label file path
    label_file = f"{path}/00_label_{label_str}.csv"

    # List all CSV files in the directory
    csv_files = glob.glob(f'{path}/*.csv')

    # Remove the label file from the list
    csv_files.remove(label_file)

    # Load the label file into a DataFrame and assign class 1
    df_positive = pd.read_csv(label_file)[['smiles']]
    if not df_positive.empty:
        df_positive['y'] = 1

        # Load the other CSV files into a DataFrame and assign class 0
        df_negative = pd.concat([pd.read_csv(file)[['smiles']] for file in csv_files])
        df_negative['y'] = 0

        # Combine the two DataFrames
        df = pd.concat([df_positive, df_negative])

        # Remove duplicates
        df.drop_duplicates(subset='smiles', inplace=True)

    else:
        df = pd.DataFrame()

    return df


# In[4]:


def apply_descriptors_and_fp(df: pd.DataFrame,
                             label_column: str,
                             smiles_column: str,
                             fp_method: str,
                             use_fingerprint: bool = True,
                             use_descriptors: bool = True) -> Union[np.ndarray, np.ndarray]:
    """Applies descriptor and fingerprint methods to a DataFrame and returns scaled descriptors and labels as numpy arrays.

    Args:
        df (pd.DataFrame): DataFrame containing the SMILES strings and labels.
        label_column (str): Name of the label column.
        smiles_column (str): Name of the SMILES column.
        fp_method (str): Name of the fingerprint method to apply.
        use_descriptors (bool): Whether or not to include descriptors in the output.
        use_fingerprint (bool): Whether or not to include fingerprint in the output.

    Returns:
        X (np.ndarray): Descriptor and/or fingerprint values depending on use_descriptors and use_fingerprint flags.
        Y (np.ndarray): Labels.
    """
    descriptor_methods_dict = {
        'MolWt': Descriptors.MolWt,
        'HeavyAtomCount': Descriptors.HeavyAtomCount,
        'NumHDonors': Descriptors.NumHDonors,
        'NumHAcceptors': Descriptors.NumHAcceptors,
        'MolLogP': Descriptors.MolLogP,
        'TPSA': rdMolDescriptors.CalcTPSA,
        'NumRotatableBonds': Lipinski.NumRotatableBonds,
        'NumAromaticRings': Descriptors.NumAromaticRings,
        'NumAliphaticRings': Descriptors.NumAliphaticRings,
    }
    fp_methods_dict = {
        'RDKFingerprint': RDKFingerprint,
        'MorganFingerprint': lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2),
        'MACCSkeys': MACCSkeys.GenMACCSKeys,
        'AvalonFP': pyAvalonTools.GetAvalonFP,
        'Gobbi-Paalman': rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect,
    }

    # Validate the fingerprint method
    if fp_method not in fp_methods_dict:
        raise ValueError(f"Invalid fingerprint method. Choices are: {list(fp_methods_dict.keys())}")

    # Convert SMILES to RDKit molecules
    df['Molecule'] = df[smiles_column].apply(Chem.MolFromSmiles)

    # Apply the descriptor methods and scale them if use_descriptors is True
    if use_descriptors:
        for descriptor, method in descriptor_methods_dict.items():
            df[descriptor] = df['Molecule'].apply(method)
        scaler = MinMaxScaler()
        df[list(descriptor_methods_dict.keys())] = scaler.fit_transform(df[list(descriptor_methods_dict.keys())])

    # Apply the fingerprint method if use_fingerprint is True
    if use_fingerprint:
        df[fp_method] = df['Molecule'].apply(fp_methods_dict[fp_method])
        df[f'np_{fp_method}'] = df[fp_method].apply(
            lambda fingerprints: np.array(list(map(int, fingerprints.ToBitString()))))
        X_fp = np.stack(df[f'np_{fp_method}'].values)

    # Create numpy array for descriptor data if use_descriptors is True
    if use_descriptors and use_fingerprint:
        X_descr = df[list(descriptor_methods_dict.keys())].values
        # Concatenate along columns axis (assuming fingerprint is 1D)
        X = np.hstack((X_descr, X_fp))
    elif use_descriptors:
        X = df[list(descriptor_methods_dict.keys())].values
    elif use_fingerprint:
        X = X_fp
    else:
        raise ValueError("At least one of use_descriptors or use_fingerprint must be True")

    # Create numpy array for label data
    if label_column:
        Y = df[label_column].values
    else:
        Y = None

    return X, Y, df




In [3]:
# In[5]:


drug_list_to_exclude = [
    "Amodiaquine",
    "Crizotinib",
    "Mebhydrolin",
    "Harringtonin",
    "Fluphenazine",
    "Fingolimod",
    "Fendiline",
    "Dronedarone",
    "Perphenazine",
    "Pimavanserin",
    "Prochlorperazine",
    "Raloxifene",
    "Abemaciclib",
    "Mefloquine",
    "Revaprazan",
    "Melitracen",
    "Nelfinavir",
    "Nicardipine",
    "Nilotinib",
    "Olmutinib",
    "harringtonin",
    "Terconazole",
    "Thioridazine",
    "Thiothixene",
    "Tilorone-dihydrochloride",
    "Triflupromazine"
]

smiles_list = get_smiles_from_drug_names(drug_list_to_exclude, new_client, return_type='list')
smiles_list

# In[6]:


smiles_dataframe = get_smiles_from_drug_names(drug_list_to_exclude, new_client, return_type='dataframe')
smiles_dataframe

# In[ ]:


now = datetime.now()
folder_name = now.strftime("%Y_%b_%d") + str('_code_classifier')
folder_name = folder_name.lower()
if os.path.isdir(folder_name):
    shutil.rmtree(folder_name)
os.makedirs(folder_name)

df_label = pd.read_csv('label-all-new.csv', header=None)
df_label.reset_index(inplace=True)
df_label.rename(columns={'index': 'label', 0: 'location'}, inplace=True)
df_label = df_label[['label', 'location']]

# Fingerprint methods list
fp_method_list = ['RDKFingerprint']

# Create dictionary to store results
smiles_prediction_results = {}

# Results dictionary
result_dict = {}

start = time.time()
ml_methods = [
    {'name': 'Random Forest', 'classifier': RandomForestClassifier()},
    {'name': 'Logistic Regression', 'classifier': LogisticRegression()},
    {'name': 'Gradient Boosting', 'classifier': GradientBoostingClassifier()},
    {'name': 'Ada Boost', 'classifier': AdaBoostClassifier()},
#     {'name': 'Dummy', 'classifier': DummyClassifier()},
#     {'name': 'Passive Aggressive', 'classifier': PassiveAggressiveClassifier(max_iter=1000)},
    {'name': 'Perceptron', 'classifier': Perceptron()},
    {'name': 'SGD', 'classifier': SGDClassifier()},
    {'name': 'SVC', 'classifier': SVC()},
    {'name': 'Linear SVC', 'classifier': LinearSVC()},
]

number_of_files = len([f for f in os.listdir('final_label_to_train_LV') if os.path.isfile(os.path.join('final_label_to_train_LV', f))])

for label_location in range(number_of_files):
    print(label_location)
    df = data_training('final_label_to_train_LV', label_location)
    if not df.empty:
        df = df[~df['smiles'].isin(smiles_list)]
        location = df_label.loc[df_label['label'] == label_location, 'location'].values[0]
        for finger_print_method in fp_method_list:
            for descriptor_bool in [True,False]:
                for finger_print_bool in [True]:
                    if finger_print_bool or descriptor_bool:
                        X, Y, df = apply_descriptors_and_fp(df, 'y', 'smiles',
                                                            finger_print_method,
                                                            finger_print_bool,
                                                            descriptor_bool)
                        number_of_zeros = sum([1 for i in Y if i == 0])
                        number_of_ones = sum([1 for i in Y if i == 1])

                        zero_ratio = round(max(number_of_zeros, number_of_ones) / number_of_zeros)
                        one_ratio = round(max(number_of_zeros, number_of_ones) / number_of_ones)
                        class_weight = {0: zero_ratio, 1: one_ratio}

                        X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, stratify=Y)

                        # Initialize the SVC with probability=True
                        for method in ml_methods:
                            print(f"Using {method['name']} method...")
                            clf = method['classifier']
                            if 'class_weight' in clf.get_params():
#                                 clf.set_params(class_weight=class_weight)
                                pass
                            if method['name'] == 'Logistic Regression':
                                clf.set_params(max_iter=500)
                            
                            clf.fit(X_train, y_train)

                            y_pred = clf.predict(X_test)

                            # Calculate probabilities for each class
                            if hasattr(clf, 'predict_proba'):
                                # Calculate probabilities for each class
                                probs = clf.predict_proba(X_test)
                                class0_probs = probs[:, 0]
                                class1_probs = probs[:, 1]
                            else:
                                # Set to 'NA' if predict_proba method does not exist
                                class0_probs = 'NA'
                                class1_probs = 'NA'

                            unique_key = f"{method['name']}_{label_location}_{finger_print_method}_{finger_print_bool}_{descriptor_bool}"
                            result_dict[unique_key] = {
                                'method': method['name'],
                                'location': location,
                                'finger_print_method': finger_print_method,
                                'finger_print_bool': finger_print_bool,
                                'descriptor_bool': descriptor_bool,
                                'accuracy': accuracy_score(y_test, y_pred),
                                'kappa': cohen_kappa_score(y_test, y_pred),
                                'f1_score': f1_score(y_test, y_pred, average='weighted'),
                                'f1_score_class_0': f1_score(y_test, y_pred, average=None)[0],
                                'f1_score_class_1': f1_score(y_test, y_pred, average=None)[1],
                                'precision': precision_score(y_test, y_pred, average='weighted'),
                                'recall': recall_score(y_test, y_pred, average='weighted'),
                                'label_1': np.count_nonzero(Y == 1),
                                'label_0': np.count_nonzero(Y == 0),
                                'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
                                'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'
                            }
                            print(f1_score(y_test, y_pred, average=None)[0])
                            print(f1_score(y_test, y_pred, average=None)[1])
                            # Predict on smiles_list and store results
                            smiles_features = apply_descriptors_and_fp(smiles_dataframe, None, 'smiles',
                                                                       finger_print_method,
                                                                       finger_print_bool,
                                                                       descriptor_bool)[0]
                            smiles_pred = clf.predict(smiles_features)
                            if hasattr(clf, 'predict_proba'):
                                smiles_pred_proba = clf.predict_proba(smiles_features)  # Get probabilities

                            else:
                                smiles_pred_proba = np.full((smiles_pred.shape[0], 2), np.nan)
                            for i, prediction in enumerate(smiles_pred):
                                smile = f"{smiles_dataframe['smiles'].iloc[i]}_{label_location}_{finger_print_method}_{finger_print_bool}_{descriptor_bool}"
                                if smile in smiles_prediction_results:
                                    smiles_prediction_results[smile].append(
                                        [label_location, finger_print_method, finger_print_bool, descriptor_bool,
                                         smiles_pred[i], smiles_pred_proba[i, 0], smiles_pred_proba[i, 1]])
                                else:
                                    smiles_prediction_results[smile] = [label_location, finger_print_method,
                                                                        finger_print_bool, descriptor_bool,
                                                                        smiles_pred[i],
                                                                        smiles_pred_proba[i, 0],
                                                                        smiles_pred_proba[i, 1]]

                            folder_path = f'./{folder_name}/{label_location}_{location}/'
                            if not os.path.exists(folder_path):
                                os.makedirs(folder_path)
                            dump(clf,
                                 folder_path + f"{method['name']}_{finger_print_method}_{descriptor_bool}_{finger_print_bool}_model.joblib")
                            np.save(folder_path + f'{finger_print_method}_{descriptor_bool}_{finger_print_bool}_X.npy',
                                    X)
                            np.save(folder_path + f'{finger_print_method}_{descriptor_bool}_{finger_print_bool}_Y.npy',
                                    Y)

#                             np.save(
#                                 folder_path + f'{method["name"]}_{finger_print_method}_{descriptor_bool}_{finger_print_bool}_X_train.npy',
#                                 X_train)
#                             np.save(
#                                 folder_path + f'{method["name"]}_{finger_print_method}_{descriptor_bool}_{finger_print_bool}_Y_train.npy',
#                                 y_train)

#                             np.save(
#                                 folder_path + f'{method["name"]}_{finger_print_method}_{descriptor_bool}_{finger_print_bool}_X_test.npy',
#                                 X_test)
#                             np.save(
#                                 folder_path + f'{method["name"]}_{finger_print_method}_{descriptor_bool}_{finger_print_bool}_Y_test.npy',
#                                 y_test)

                            df.to_csv(
                                folder_path + f'{method["name"]}_{finger_print_method}_{descriptor_bool}_{finger_print_bool}_df.csv',
                                index=False)

    else:
        location = 'nan'
        unique_key = f"{label_location}_{location}"
        result_dict[unique_key] = {
            'method': 'NA',
            'location': location,
            'finger_print_method': 'NA',
            'finger_print_bool': 'NA',
            'kappa':'NA',
            'descriptor_bool': 'NA',
            'accuracy': 'NA',
            'f1_score': 'NA',
            'f1_score_class_0': 'NA',
            'f1_score_class_1': 'NA',
            'precision': 'NA',
            'recall': 'NA'
        }

df_result = pd.DataFrame.from_dict(result_dict, orient='index')
end = start - time.time()

# In[9]:


df_result.to_csv('final_result_classifier.csv', index=False)

with open('smiles_prediction_results_classifier', 'wb') as f:
    pickle.dump(smiles_prediction_results, f)

# In[10]:

0
1
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8769405528209011
0.31865828092243187
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.871853986551393
0.3530552861299709
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9035183170112441
0.2631578947368421
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8991994177583698
0.2513513513513513
Using Perceptron method...
0.672463768115942
0.3530534351145039
Using SGD method...
0.8789473684210527
0.29694323144104806
Using SVC method...
0.9064433927921369
0.307277628032345
Using Linear SVC method...




0.8549588396707173
0.3474426807760141
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.88306299509619
0.33618843683083516
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8673724735322426
0.3381364073006724
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9006526468455403
0.2388888888888889
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8954296160877513
0.25326370757180156
Using Perceptron method...
0.8891284815813117
0.0804769001490313
Using SGD method...
0.8770476369798531
0.29405405405405405
Using SVC method...
0.906472196900638
0.31691078561917446
Using Linear SVC method...




0.8916103089014805
0.22483660130718955
2
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8590524867308825
0.3759791122715405
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8400400400400401
0.3561643835616438
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8776545762074798
0.2885245901639344
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8669430833019224
0.24086021505376343
Using Perceptron method...
0.8086003372681283
0.3914209115281501
Using SGD method...
0.8113522537562604
0.37396121883656513
Using SVC method...
0.8817975830815711
0.33404255319148934
Using Linear SVC method...




0.6962424242424242
0.4064424443391757
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8501976284584981
0.3554421768707483
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8448620551779289
0.37115072933549437
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8772855796418473
0.3007518796992481
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8674881516587677
0.2726326742976067
Using Perceptron method...
0.5718216840946615
0.40199846272098383
Using SGD method...
0.7602708803611739
0.4119601328903654
Using SVC method...
0.8793332070467893
0.3343782654127481
Using Linear SVC method...




0.8088142707240294
0.3806934058463631
3
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9843495272253016
0.0588235294117647
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9797930014785609
0.174496644295302
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9866623292127521
0.06818181818181819
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9865062591448545
0.02352941176470588
Using Perceptron method...
0.9701294390972452
0.14285714285714288
Using SGD method...
0.9765831134564644
0.17441860465116282
Using SVC method...
0.9874898456539398
0.04938271604938272
Using Linear SVC method...




0.9732054250744293
0.14736842105263157
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9866536458333333
0.10869565217391305
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9824618914931977
0.2074074074074074
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9871482023751423
0.11235955056179776
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9868142601334852
0.12903225806451613
Using Perceptron method...
0.9779532741033234
0.1518987341772152
Using SGD method...
0.9695760598503741
0.17194570135746606
Using SVC method...
0.9876503087422814
0.07317073170731708
Using Linear SVC method...




0.9747483083016999
0.13559322033898305
4
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8217659137577003
0.3645680819912152
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8226979837871544
0.4014035087719298
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8527666092284129
0.24086870681145114
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8424936880947757
0.25390984360625574
Using Perceptron method...
0.39610607586438407
0.44765121277249
Using SGD method...
0.7108818472119872
0.45635103926097
Using SVC method...
0.8599651365485184
0.32618825722274
Using Linear SVC method...




0.8389261744966443
0.3025641025641026
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8207008964955175
0.3373493975903615
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8154362416107381
0.4005449591280654
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8531441717791411
0.24901960784313726
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8381211180124223
0.23062730627306274
Using Perceptron method...
0.8220834172879308
0.3063629222309505
Using SGD method...
0.6872202884137245
0.4317976513098465
Using SVC method...
0.859305259072385
0.3305632502308402
Using Linear SVC method...




0.8031329381879764
0.3849206349206349
5
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.4653579676674365
0.7944049733570161
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.4720041862899006
0.7667052023121387
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.3181818181818182
0.8091133004926109
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.3178991015894955
0.7939026936730006
Using Perceptron method...
0.5238866396761135
0.6877323420074349
Using SGD method...
0.4914549653579676
0.7295504789977892
Using SVC method...
0.4149432955303536
0.8148617268313278
Using Linear SVC method...




0.36504854368932044
0.7908761458111276
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.44825614636935396
0.7849342545130377
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.46153846153846156
0.7597402597402597
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.33216045038705144
0.8029075804776739
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.31935047361299057
0.7885666246321985
Using Perceptron method...
0.5287090558766859
0.6641032683328756
Using SGD method...
0.5089757127771911
0.5891016200294551
Using SVC method...
0.4074790457769181
0.8038420490928495
Using Linear SVC method...




0.46065659197498693
0.7602501737317583
6
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8694647670388911
0.3493282149712092
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.859591041869523
0.3451407811080836
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8907593778591034
0.22568093385214008
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8825478645066274
0.2064676616915423
Using Perceptron method...
0.8566399374144339
0.3472840605520926
Using SGD method...
0.7357798165137615
0.38592750533049036
Using SVC method...
0.8940092165898619
0.29099876695437726
Using Linear SVC method...




0.8678736517719569
0.342911877394636
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8787072243346008
0.34631147540983614
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8612403100775194
0.33457249070631967
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8911614317019723
0.21578947368421053
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8857612867848658
0.1830065359477124
Using Perceptron method...
0.7627573858549686
0.40045248868778277
Using SGD method...
0.8747417840375588
0.2678375411635565
Using SVC method...
0.8928899503950029
0.26481715006305173
Using Linear SVC method...




0.8676781256001536
0.33041788143828965
7
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.854690618762475
0.4061990212071778
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8377823408624229
0.4216691068814056
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8741898589401449
0.3333333333333333
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.863373214974913
0.3282732447817837
Using Perceptron method...
0.5064275037369208
0.4289173296437219
Using SGD method...
0.8415101958409044
0.3881527669524552
Using SVC method...
0.8807870370370371
0.4125475285171103
Using Linear SVC method...




0.833264887063655
0.4055636896046852
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8538601088490223
0.4313725490196078
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8413118761458546
0.4129615674453655
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8815689261233816
0.36788617886178865
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8686131386861314
0.33592233009708744
Using Perceptron method...
0.8584995251661919
0.23274974253347064
Using SGD method...
0.8250265111346766
0.45759368836291914
Using SVC method...
0.8831369519026464
0.42870632672332387
Using Linear SVC method...




0.7614678899082569
0.4456289978678038
8
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.896335849751427
0.30062111801242236
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8879294161817157
0.3432343234323433
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9187366926898509
0.23666666666666666
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9112658453847529
0.21732283464566932
Using Perceptron method...
0.8513754205422521
0.36517328825021134
Using SGD method...
0.887271369914147
0.31207289293849655
Using SVC method...
0.9216211390823067
0.3086614173228347
Using Linear SVC method...




0.8976058931860037
0.3101736972704715
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8940873088966661
0.2874845105328377
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8915032679738563
0.34052213393870606
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9184472934472935
0.26129032258064516
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9115186042371373
0.19709208400646203
Using Perceptron method...
0.9099307159353349
0.16474464579901155
Using SGD method...
0.8916201117318435
0.32794457274826794
Using SVC method...
0.918484090096532
0.2897196261682243
Using Linear SVC method...




0.8997419830446002
0.3283950617283951
9
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9694101740746999
0.4326018808777429
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9590784044016507
0.43333333333333335
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9715631835773179
0.42320819112627983
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9680349932705249
0.34931506849315064
Using Perceptron method...
0.9339285714285714
0.4182389937106918
Using SGD method...
0.9629756014332025
0.4213333333333333
Using SVC method...
0.9740915208613729
0.4726027397260274
Using Linear SVC method...




0.948191933240612
0.384297520661157
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9692671394799055
0.4203821656050955
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9636986301369864
0.46464646464646464
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9727456258411844
0.44520547945205485
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9683395082519367
0.3691275167785235
Using Perceptron method...
0.9351341745157278
0.40065681444991785
Using SGD method...
0.9531087183049669
0.4351464435146444
Using SVC method...
0.9742380872200707
0.4848484848484849
Using Linear SVC method...




0.9489937543372658
0.37711864406779655
10
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7564402810304449
0.47100712105798576
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.739784946236559
0.46903949293027786
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7956531003622417
0.37848347375243035
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7747942832394977
0.3572311495673672
Using Perceptron method...
0.777710233029382
0.15680245964642583
Using SGD method...
0.7711200508151599
0.28552544613350955
Using SVC method...
0.8012087200518023
0.42545227698066124
Using Linear SVC method...




0.6183491515674431
0.5190286335628851
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7581639643754282
0.4297253634894992
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7498839907192576
0.44029075804776735
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7913427190586257
0.3276912660798917
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7766907698851554
0.3155149934810952
Using Perceptron method...
0.7014593123917883
0.4496124031007752
Using SGD method...
0.7125984251968503
0.4622467771639042
Using SVC method...
0.7964904772095014
0.3915547024952016
Using Linear SVC method...




0.7468936953520477
0.417989417989418
11
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8808526836695851
0.3625254582484725
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.873015873015873
0.38691588785046727
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9039692701664532
0.31729518855656696
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.897117840204305
0.2519893899204244
Using Perceptron method...
0.89025069637883
0.30552291421856637
Using SGD method...
0.8894339622641508
0.37393162393162394
Using SVC method...
0.9086556169429097
0.38461538461538464
Using Linear SVC method...




0.8606395578365574
0.39658119658119656
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8820064601938058
0.3617677286742035
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8718743942624539
0.3862581244196843
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9028613085474757
0.2883845126835781
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8929027549717204
0.22251655629139072
Using Perceptron method...
0.6951539746956316
0.3761602344894968
Using SGD method...
0.8143512724238632
0.3828016643550624
Using SVC method...
0.9071178958984735
0.36795994993742176
Using Linear SVC method...




0.8630917305048125
0.3912663755458515
12
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9990369181380416
0.0
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9988762241130198
0.0
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9990369181380416
0.0
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.999197560584176
0.0
Using Perceptron method...
0.9983938323160938
0.0
Using SGD method...


  _warn_prf(average, modifier, msg_start, len(result))


0.9993581514762516
0.0
Using SVC method...


  _warn_prf(average, modifier, msg_start, len(result))


0.9993581514762516
0.0
Using Linear SVC method...




0.998554681226915
0.0
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9990369181380416
0.0
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9988762241130198
0.0
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9987154784842646
0.0
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9988762241130198
0.0
Using Perceptron method...
0.9990369181380416
0.0
Using SGD method...
0.998554681226915
0.0
Using SVC method...


  _warn_prf(average, modifier, msg_start, len(result))


0.9993581514762516
0.0
Using Linear SVC method...




0.9988762241130198
0.0
13
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9119416590701914
0.3568575233022636
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8985615542686344
0.38505096262740657
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.923728813559322
0.24475524475524474
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9185943846018012
0.1954624781849913
Using Perceptron method...
0.8858001502629602
0.33333333333333337
Using SGD method...
0.8563524995060264
0.38127659574468087
Using SVC method...
0.9285206258890469
0.3431372549019608
Using Linear SVC method...




0.9064273942501374
0.3406451612903226
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9159970781592404
0.39473684210526316
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8950996832494876
0.3521288837744534
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9254207263064659
0.2876480541455161
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9186500888099466
0.24422442244224424
Using Perceptron method...
0.9039444850255661
0.3078947368421053
Using SGD method...
0.8139534883720931
0.36901408450704226
Using SVC method...
0.9292066880113838
0.35179153094462545
Using Linear SVC method...




0.8866174522051864
0.3714585519412381
14
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8556942277691106
0.33212996389891697
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8468185388845249
0.3181818181818182
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8837038397942312
0.201765447667087
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8750692009595867
0.17135862913096694
Using Perceptron method...
0.6411167512690354
0.3841463414634146
Using SGD method...
0.8497652582159624
0.3167259786476868
Using SVC method...
0.8849294729027469
0.26886792452830194
Using Linear SVC method...




0.8476716653512233
0.339041095890411
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8610195774374879
0.33426183844011137
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8405172413793104
0.28091872791519434
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.883293512222018
0.20125786163522014
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8753452402872398
0.1590062111801242
Using Perceptron method...
0.7637483501979763
0.36449704142011835
Using SGD method...
0.8160990712074303
0.35945363048166784
Using SVC method...
0.8847721378288256
0.2577565632458234
Using Linear SVC method...




0.7758620689655173
0.3483709273182957
15
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9515630367571281
0.3188405797101449
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9328894340283863
0.275992438563327
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9556082683835988
0.2155688622754491
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9541595925297114
0.21965317919075142
Using Perceptron method...
0.917053986414015
0.2772585669781931
Using SGD method...
0.9190834228428213
0.30461538461538457
Using SVC method...
0.9590901374978782
0.30144927536231886
Using Linear SVC method...




0.9261862917398946
0.23076923076923075
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9509381993458427
0.3325526932084309
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9354668542289432
0.331511839708561
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9570074475287744
0.22560975609756098
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9536692593845113
0.1490683229813665
Using Perceptron method...
0.9369085173501577
0.32075471698113206
Using SGD method...
0.9176131242112854
0.3367198838896952
Using SVC method...
0.9594158600781117
0.31123919308357345
Using Linear SVC method...




0.9175460561616885
0.28527131782945736
16
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.4761363636363636
0.794012511170688
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.4905263157894737
0.7767527675276753
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.38771331058020475
0.8119891008174387
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.37364043506078054
0.7904986090306013
Using Perceptron method...
0.47770069375619423
0.7501185395922237
Using SGD method...
0.4050785973397823
0.7852466171977301
Using SVC method...
0.4621164683782092
0.8148307824962278
Using Linear SVC method...




0.4430232558139535
0.7878653675819309
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.49346219442865275
0.8009828009828011
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.48891181021144914
0.7693739818478008
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.34907597535934287
0.8008376963350785
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.354073123797306
0.7846910412657687
Using Perceptron method...
0.5192442267319803
0.5932504440497336
Using SGD method...
0.32593619972260746
0.7972465581977473
Using SVC method...
0.4688279301745636
0.8160621761658032
Using Linear SVC method...




0.4676258992805755
0.7585081585081584
17
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9801963721085039
0.47577092511013225
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9772689004882976
0.5454545454545455
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9811886132845015
0.5065502183406114
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9745989304812834
0.3968253968253968
Using Perceptron method...
0.9790514496396849
0.5353159851301115
Using SGD method...
0.9751731126498902
0.5333333333333332
Using SVC method...
0.9812136325852038
0.48868778280542985
Using Linear SVC method...




0.9699235344095157
0.49572649572649574
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9785536159600997
0.416289592760181
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9770347855454238
0.5668789808917197
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9790697674418604
0.4166666666666667
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.976162693782297
0.3966244725738397
Using Perceptron method...
0.975206611570248
0.521172638436482
Using SGD method...
0.9676978294308666
0.5090909090909091
Using SVC method...
0.9820657588840915
0.4953271028037383
Using Linear SVC method...




0.9709331973482916
0.5155807365439093
18
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9678402166553826
0.4207317073170732
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9527613754775964
0.4309623430962343
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9684601113172542
0.39087947882736157
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9636271358484181
0.3384615384615385
Using Perceptron method...
0.9587203302373581
0.43127962085308064
Using SGD method...
0.9573557115944511
0.3727959697732997
Using SVC method...
0.9708312257629406
0.4327868852459017
Using Linear SVC method...




0.9403299403299402
0.3680297397769517
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9666497375994583
0.40121580547112456
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9537452537107354
0.39366515837104066
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9691867317730257
0.38383838383838387
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9647851727042965
0.30564784053156147
Using Perceptron method...
0.9471849398640405
0.39278557114228463
Using SGD method...
0.9578189300411523
0.39108910891089105
Using SVC method...
0.9691243462122491
0.4077669902912621
Using Linear SVC method...




0.9452389257063133
0.3745019920318725
19
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8081580624601657
0.4094179202092871
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7936714347637625
0.4130702836004932
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8318725099601592
0.3059210526315789
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8158961881589619
0.30368098159509205
Using Perceptron method...
0.792887029288703
0.32005494505494503
Using SGD method...
0.8186084472556622
0.3340823970037453
Using SVC method...
0.8364881192106324
0.3606299212598425
Using Linear SVC method...




0.7651157563497416
0.41522104085058764
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7981038569273863
0.4125391849529781
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7882507672073651
0.4229390681003584
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.829375374176811
0.3020408163265306
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8237424547283702
0.30805687203791465
Using Perceptron method...
0.7135580887703129
0.44107903454803593
Using SGD method...
0.7982905982905983
0.39331619537275064
Using SVC method...
0.83373786407767
0.36377708978328177
Using Linear SVC method...




0.7530438777854354
0.4291024960169942
20
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9160826594788859
0.30402384500745155
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9076555902331559
0.36248415716096327
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9276476756378889
0.19455252918287938
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.922618003158449
0.1787709497206704
Using Perceptron method...
0.9182412149037612
0.19197207678883071
Using SGD method...
0.8933159560603239
0.3375722543352601
Using SVC method...
0.932467988072268
0.2803738317757009
Using Linear SVC method...




0.8980963045912654
0.378132118451025
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9194739686542965
0.34744525547445254
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8999815804015472
0.3271375464684015
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9279972037749039
0.19844357976653693
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9205402560954218
0.15327102803738316
Using Perceptron method...
0.9149812063719348
0.26810477657935283
Using SGD method...
0.912621359223301
0.2789317507418398
Using SVC method...
0.9305750350631137
0.2556390977443609
Using Linear SVC method...




0.8560798903465833
0.3489813994685562
21
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8908378428811345
0.3329532497149373
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8773728170083522
0.33264462809917356
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.906115107913669
0.22781065088757396
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8998737145949848
0.19913419913419916
Using Perceptron method...
0.6801249699591445
0.35855421686746985
Using SGD method...
0.855855855855856
0.3486725663716814
Using SVC method...
0.9090909090909091
0.28449502133712656
Using Linear SVC method...




0.8970831040176115
0.28535031847133757
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8852704473142429
0.3135498320268757
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8725190839694658
0.32931726907630526
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9065805988882912
0.20940819423368742
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9032955159373313
0.2137628111273792
Using Perceptron method...
0.8814132681826724
0.3103825136612022
Using SGD method...
0.8285714285714285
0.3712574850299401
Using SVC method...
0.9101428313144098
0.29503546099290784
Using Linear SVC method...




0.8908184001477922
0.28189550425273385
22
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9295725904135663
0.10940919037199123
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9143461402890377
0.13523131672597866
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9497809234917425
0.013245033112582783
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.949232585596222
0.01954397394136808
Using Perceptron method...
0.9359150102810144
0.065
Using SGD method...
0.9357986646122239
0.05063291139240506
Using SVC method...
0.9485579355709226
0.006514657980456026
Using Linear SVC method...




0.9088329194749911
0.14046822742474915
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9294605809128631
0.09734513274336283
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9180098107918712
0.11363636363636363
Using Gradient Boosting method...


  _warn_prf(average, modifier, msg_start, len(result))
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9494609164420486
0.0
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9495869162030012
0.019672131147540985
Using Perceptron method...
0.9280887194593659
0.1075268817204301
Using SGD method...
0.879779411764706
0.178391959798995
Using SVC method...
0.9491068419278733
0.0
Using Linear SVC method...




0.9146986253084243
0.1387900355871886
23
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9160801299404441
0.33093525179856115
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9026548672566372
0.3497536945812808
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9291754756871037
0.2821428571428571
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9260109482606392
0.268760907504363
Using Perceptron method...
0.7815450643776825
0.3540609137055838
Using SGD method...
0.8895981975215922
0.35384615384615387
Using SVC method...
0.9302489846371181
0.3106457242582897
Using Linear SVC method...




0.893664735563446
0.35706214689265536
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9191043698085951
0.3581661891117479
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9032975175991109
0.37708830548926014
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9311926605504588
0.31338028169014087
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9237273207680112
0.22540250447227192
Using Perceptron method...
0.8849085365853658
0.38866396761133604
Using SGD method...
0.9087604507451835
0.31607629427792916
Using SVC method...
0.9319631467044649
0.35135135135135137
Using Linear SVC method...




0.908029197080292
0.33333333333333337
24
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8443556443556444
0.3671811535337124
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.835965978128797
0.3759630200308166
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8722964077487305
0.2611534276387377
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8593213877239801
0.2545454545454546
Using Perceptron method...
0.8486816965991594
0.20958083832335328
Using SGD method...
0.8289795918367346
0.3727544910179641
Using SVC method...
0.8748577929465301
0.31392931392931395
Using Linear SVC method...




0.739323220536756
0.42747309072270634
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8443908323281062
0.3866877971473851
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8407798537774168
0.40243902439024387
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8699416525503483
0.2513542795232936
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8631020098596891
0.2494802494802495
Using Perceptron method...
0.8591786986686668
0.16832779623477295
Using SGD method...
0.8342396777442096
0.35247836349331235
Using SVC method...
0.8744292237442922
0.32653061224489793
Using Linear SVC method...




0.8345618345618346
0.40236686390532544
25
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8852336448598132
0.30699774266365687
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8642352491884666
0.2882882882882883
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.900305370935872
0.17040358744394618
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8943942133815551
0.1728045325779037
Using Perceptron method...
0.842292490118577
0.3214285714285714
Using SGD method...
0.8507899356348743
0.31018935978358875
Using SVC method...
0.9014644729705298
0.2269503546099291
Using Linear SVC method...




0.8824183976261128
0.24881516587677724
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8809031044214488
0.3127035830618893
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8698799771385026
0.3080040526849037
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9001436781609194
0.16766467065868262
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8960028938325195
0.18670438472418668
Using Perceptron method...
0.8631701180568996
0.33863423760523853
Using SGD method...
0.8585799961307797
0.3149015932521087
Using SVC method...
0.9031323556038384
0.24964936886395514
Using Linear SVC method...




0.8784240150093809
0.28476821192052976
26
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9097660601559598
0.42823529411764705
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8975903614457831
0.41125541125541126
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9190059002324333
0.2954898911353033
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9085083659665362
0.16828478964401294
Using Perceptron method...
0.7734855136084284
0.38571428571428573
Using SGD method...
0.8902278290340803
0.3697297297297297
Using SVC method...
0.9233267183835469
0.3867243867243867
Using Linear SVC method...




0.8941488354478319
0.41465968586387436
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.907277428888068
0.389294403892944
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8894348894348896
0.380952380952381
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9170975218399001
0.2583732057416268
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9087994299964375
0.17684887459807075
Using Perceptron method...
0.897944063715503
0.34169653524492233
Using SGD method...
0.9038773669972948
0.22865412445730826
Using SVC method...
0.9210241615578795
0.3652173913043478
Using Linear SVC method...




0.8845349058398326
0.37997957099080704
27
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8243801652892563
0.3911174785100287
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8203944607637432
0.417687074829932
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8456066141126707
0.22415458937198066
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8344706570481576
0.2330623306233062
Using Perceptron method...
0.7816964285714286
0.4430523917995445
Using SGD method...
0.7689463955637708
0.47589098532494756
Using SVC method...
0.8559734944455271
0.3312217194570136
Using Linear SVC method...




0.7967833079765267
0.4281345565749235
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8172535944988539
0.3897007654836465
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8109704641350212
0.40106951871657753
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8457672980286046
0.24858757062146894
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8391142465216539
0.27537511032656664
Using Perceptron method...
0.7250178869544479
0.43563387175721974
Using SGD method...
0.7820683533067022
0.4323699421965318
Using SVC method...
0.8500391542678153
0.32092198581560283
Using Linear SVC method...




0.7981220657276996
0.3896774193548387
28
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.5406102277610657
0.7265285239191609
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.5516680227827502
0.7083112758073056
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.5116063138347261
0.7422831945124938
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.4785165083672546
0.7135403726708075
Using Perceptron method...
0.30751708428246016
0.7285714285714285
Using SGD method...
0.5875666074600355
0.6606255480853552
Using SVC method...
0.5614973262032086
0.7535070140280561
Using Linear SVC method...




0.599021207177814
0.6124251024913276
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.5459940652818992
0.7237554810420427
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.5447912444264289
0.7020429822234014
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.46454069490718713
0.7279322853688028
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.45112107623318387
0.6944583125312032
Using Perceptron method...
0.5933660933660935
0.5557046979865772
Using SGD method...
0.5214827295703454
0.7058518902123253
Using SVC method...
0.5333333333333333
0.7420318725099602
Using Linear SVC method...




0.5811537083482623
0.6606676342525399
29
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9169004207573632
0.10902255639097745
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9058614564831261
0.1254125412541254
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9394351820347057
0.00558659217877095
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9394351820347057
0.00558659217877095
Using Perceptron method...
0.8819558474730889
0.14304635761589404
Using SGD method...
0.8629726694122052
0.18120805369127518
Using SVC method...
0.9383304940374788
0.010928961748633878
Using Linear SVC method...




0.8808518450523224
0.1774397972116603
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9194595543077734
0.14525139664804468
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.909541511771995
0.12947189097103917
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9381917248424996
0.0
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9392547217968351
0.005571030640668524
Using Perceptron method...
0.869063140249581
0.18915801614763553
Using SGD method...
0.9165790396074308
0.1018867924528302
Using SVC method...
0.9379474940334128
0.016216216216216217
Using Linear SVC method...




0.9131810193321618
0.09523809523809525
30
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7542802025560645
0.5122067975107707
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7287589794401784
0.5020463847203274
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.782495101241019
0.39196591600730374
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7636925795053002
0.373536299765808
Using Perceptron method...
0.7699530516431926
0.30451612903225805
Using SGD method...
0.665929814865985
0.5380206343141001
Using SVC method...
0.7933884297520662
0.47413303013075614
Using Linear SVC method...




0.7529357586921482
0.43317485472794504
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7622478386167146
0.5222007722007722
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7362262311067771
0.49297094657919405
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.785220397579948
0.3818407960199005
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7649789962414327
0.3794512551079977
Using Perceptron method...
0.7210321324245377
0.46146616541353386
Using SGD method...
0.6462489463332396
0.5296974224878596
Using SVC method...
0.798671096345515
0.47181871005229514
Using Linear SVC method...




0.7723281216534591
0.32163369495851946
31
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8205128205128205
0.46015424164524427
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.7940533449934413
0.4332129963898917
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8363851151801536
0.28176318063958516
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8178528347406514
0.28209191759112523
Using Perceptron method...
0.7949718248807977
0.41676942046855736
Using SGD method...
0.4189776144336786
0.463768115942029
Using SVC method...
0.842274462527627
0.3764892772041303
Using Linear SVC method...




0.7712801986904493
0.4394023242944106
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8133645456480102
0.42940793754066364
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8008611410118407
0.41860465116279066
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8333988212180746
0.2600349040139616
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8165046617734576
0.22594142259414227
Using Perceptron method...
0.7817745803357313
0.39296543359611885
Using SGD method...
0.6054421768707483
0.4859675036927621
Using SVC method...
0.8413047828697219
0.35996771589991927
Using Linear SVC method...




0.8192078801559615
0.35363169479090245
32
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8930421909696521
0.3052884615384615
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8903566710700132
0.3799359658484525
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9140807174887893
0.2753403933434191
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9087971274685818
0.23723723723723722
Using Perceptron method...
0.8868140243902439
0.39878542510121456
Using SGD method...
0.8725718725718725
0.3970588235294118
Using SVC method...
0.9157060518731989
0.31578947368421056
Using Linear SVC method...




0.8938798427849522
0.3650615901455767
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9005177514792899
0.3502415458937198
Using Logistic Regression method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.8869565217391304
0.36786469344608874
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9139881486801938
0.28185907046476766
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9106981778820133
0.2857142857142857
Using Perceptron method...
0.871260374445088
0.3677725118483412
Using SGD method...
0.8772130211307824
0.3438453713123093
Using SVC method...
0.9178378378378378
0.33527696793002915
Using Linear SVC method...




0.8264906127501547
0.394528437724982
33
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9506773185133728
0.405857740585774
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.936147570060305
0.39799331103678925
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9577705590699265
0.3617571059431524
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.951340276592112
0.24802110817941952
Using Perceptron method...
0.9058212829069336
0.39667458432304037
Using SGD method...
0.9385593220338982
0.3916083916083916
Using SVC method...
0.9599862613772969
0.4358353510895884
Using Linear SVC method...




0.9329787234042554
0.3657718120805369
Using Random Forest method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9533321852936112
0.3682983682983683
Using Logistic Regression method...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9350833628946434
0.3879598662207358
Using Gradient Boosting method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9548012962647109
0.28954423592493295
Using Ada Boost method...


  'class0_proba': np.mean(class0_probs) if class0_probs != 'NA' else 'NA',
  'class1_proba': np.mean(class1_probs) if class1_probs != 'NA' else 'NA'


0.9491293956981905
0.21164021164021166
Using Perceptron method...
0.9332628983976052
0.31956912028725315
Using SGD method...
0.9363876651982378
0.35650623885918004
Using SVC method...
0.9569083447332422
0.35051546391752575
Using Linear SVC method...




0.9227722772277228
0.3700440528634361


In [4]:
df.empty

False

In [5]:
df_result = pd.DataFrame.from_dict(result_dict, orient='index')


In [11]:
df_result.to_csv('ZZ-diffmodels.csv', index = False)

In [None]:
df_result

In [7]:
from datetime import datetime

# Get the current time and format it as a string
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f'reso_with_kappa_SVM_{timestamp}.csv'

# Save the dataframe without overwriting previous files
df_result.to_csv(filename, index=False)

In [8]:
drug_list_to_exclude = [
    "Amodiaquine",
    "Crizotinib",
    "Mebhydrolin",
    "Harringtonin",
    "Fluphenazine",
    "Fingolimod",
    "Fendiline",
    "Dronedarone",
    "Perphenazine",
    "Pimavanserin",
    "Prochlorperazine",
    "Raloxifene",
    "Abemaciclib",
    "Mefloquine",
    "Revaprazan",
    "Melitracen",
    "Nelfinavir",
    "Nicardipine",
    "Nilotinib",
    "Olmutinib",
    "harringtonin",
    "Terconazole",
    "Thioridazine",
    "Thiothixene",
    "Tilorone-dihydrochloride",
    "Triflupromazine"
]


smiles_dataframe = get_smiles_from_drug_names(drug_list_to_exclude, new_client, return_type='dataframe')
smiles_dataframe

Unnamed: 0,drug_name,smiles
0,Amodiaquine,CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O
1,Crizotinib,C[C@@H](Oc1cc(-c2cnn(C3CCNCC3)c2)cnc1N)c1c(Cl)...
2,Mebhydrolin,CN1CCc2c(c3ccccc3n2Cc2ccccc2)C1
3,Harringtonin,COC(=O)C[C@@](O)(CCC(C)(C)O)C(=O)O[C@@H]1C(OC)...
4,Fluphenazine,OCCN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1
5,Fingolimod,CCCCCCCCc1ccc(CCC(N)(CO)CO)cc1
6,Fendiline,CC(NCCC(c1ccccc1)c1ccccc1)c1ccccc1
7,Dronedarone,CCCCc1oc2ccc(NS(C)(=O)=O)cc2c1C(=O)c1ccc(OCCCN...
8,Perphenazine,OCCN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1
9,Pimavanserin,CC(C)COc1ccc(CNC(=O)N(Cc2ccc(F)cc2)C2CCN(C)CC2...


In [9]:
smiles_dataframe['smiles']

0                CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O
1     C[C@@H](Oc1cc(-c2cnn(C3CCNCC3)c2)cnc1N)c1c(Cl)...
2                       CN1CCc2c(c3ccccc3n2Cc2ccccc2)C1
3     COC(=O)C[C@@](O)(CCC(C)(C)O)C(=O)O[C@@H]1C(OC)...
4        OCCN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1
5                        CCCCCCCCc1ccc(CCC(N)(CO)CO)cc1
6                    CC(NCCC(c1ccccc1)c1ccccc1)c1ccccc1
7     CCCCc1oc2ccc(NS(C)(=O)=O)cc2c1C(=O)c1ccc(OCCCN...
8              OCCN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1
9     CC(C)COc1ccc(CNC(=O)N(Cc2ccc(F)cc2)C2CCN(C)CC2...
10    CN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1.O=S(=O)...
11    O=C(c1ccc(OCCN2CCCCC2)cc1)c1c(-c2ccc(O)cc2)sc2...
12    CCN1CCN(Cc2ccc(Nc3ncc(F)c(-c4cc(F)c5nc(C)n(C(C...
13    O[C@@H](c1cc(C(F)(F)F)nc2c(C(F)(F)F)cccc12)[C@...
14            Cc1nc(Nc2ccc(F)cc2)nc(N2CCc3ccccc3C2C)c1C
15                  CN(C)CCC=C1c2ccccc2C(C)(C)c2ccccc21
16    Cc1c(O)cccc1C(=O)N[C@@H](CSc1ccccc1)[C@H](O)CN...
17    COC(=O)C1=C(C)NC(C)=C(C(=O)OCCN(C)Cc2ccccc

In [16]:
for key in smiles_prediction_results:
    print(f"Length of list for key '{key}': {len(smiles_prediction_results[key])}")


Length of list for key 'CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O_1_RDKFingerprint_True_True': 14
Length of list for key 'C[C@@H](Oc1cc(-c2cnn(C3CCNCC3)c2)cnc1N)c1c(Cl)ccc(F)c1Cl_1_RDKFingerprint_True_True': 14
Length of list for key 'CN1CCc2c(c3ccccc3n2Cc2ccccc2)C1_1_RDKFingerprint_True_True': 14
Length of list for key 'COC(=O)C[C@@](O)(CCC(C)(C)O)C(=O)O[C@@H]1C(OC)=C[C@]23CCCN2CCc2cc4c(cc2[C@@H]13)OCO4_1_RDKFingerprint_True_True': 22
Length of list for key 'OCCN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1_1_RDKFingerprint_True_True': 14
Length of list for key 'CCCCCCCCc1ccc(CCC(N)(CO)CO)cc1_1_RDKFingerprint_True_True': 14
Length of list for key 'CC(NCCC(c1ccccc1)c1ccccc1)c1ccccc1_1_RDKFingerprint_True_True': 14
Length of list for key 'CCCCc1oc2ccc(NS(C)(=O)=O)cc2c1C(=O)c1ccc(OCCCN(CCCC)CCCC)cc1.Cl_1_RDKFingerprint_True_True': 14
Length of list for key 'OCCN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1_1_RDKFingerprint_True_True': 14
Length of list for key 'CC(C)COc1ccc(CNC(=O)N(Cc2ccc(F)cc2)C2CCN(

In [18]:
padding_value = None
max_length = max(len(lst) for lst in smiles_prediction_results.values())
padded_results = {k: v + [padding_value] * (max_length - len(v)) for k, v in smiles_prediction_results.items()}

In [20]:
df_smiles_prediction_results = pd.DataFrame.from_dict(padded_results, orient='index')  # or padded_results
df_smiles_prediction_results

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O_1_RDKFingerprint_True_True,1,RDKFingerprint,True,True,1,0.480000,0.520000,"[1, RDKFingerprint, True, True, 0, 0.794520807...","[1, RDKFingerprint, True, True, 0, 0.600654019...","[1, RDKFingerprint, True, True, 0, 0.674318326...",...,"[1, RDKFingerprint, True, True, 0, nan, nan]","[1, RDKFingerprint, True, True, 0, nan, nan]",,,,,,,,
C[C@@H](Oc1cc(-c2cnn(C3CCNCC3)c2)cnc1N)c1c(Cl)ccc(F)c1Cl_1_RDKFingerprint_True_True,1,RDKFingerprint,True,True,0,0.805000,0.195000,"[1, RDKFingerprint, True, True, 0, 0.970698517...","[1, RDKFingerprint, True, True, 0, 0.665462615...","[1, RDKFingerprint, True, True, 0, 0.675518301...",...,"[1, RDKFingerprint, True, True, 0, nan, nan]","[1, RDKFingerprint, True, True, 0, nan, nan]",,,,,,,,
CN1CCc2c(c3ccccc3n2Cc2ccccc2)C1_1_RDKFingerprint_True_True,1,RDKFingerprint,True,True,0,0.728500,0.271500,"[1, RDKFingerprint, True, True, 0, 0.760693774...","[1, RDKFingerprint, True, True, 0, 0.701247503...","[1, RDKFingerprint, True, True, 0, 0.506795055...",...,"[1, RDKFingerprint, True, True, 0, nan, nan]","[1, RDKFingerprint, True, True, 0, nan, nan]",,,,,,,,
COC(=O)C[C@@](O)(CCC(C)(C)O)C(=O)O[C@@H]1C(OC)=C[C@]23CCCN2CCc2cc4c(cc2[C@@H]13)OCO4_1_RDKFingerprint_True_True,1,RDKFingerprint,True,True,1,0.360000,0.640000,"[1, RDKFingerprint, True, True, 1, 0.36, 0.64]","[1, RDKFingerprint, True, True, 0, 0.817312424...","[1, RDKFingerprint, True, True, 0, 0.817312424...",...,"[1, RDKFingerprint, True, True, 0, 0.680129707...","[1, RDKFingerprint, True, True, 0, 0.680129707...","[1, RDKFingerprint, True, True, 1, nan, nan]","[1, RDKFingerprint, True, True, 1, nan, nan]","[1, RDKFingerprint, True, True, 0, nan, nan]","[1, RDKFingerprint, True, True, 0, nan, nan]","[1, RDKFingerprint, True, True, 0, nan, nan]","[1, RDKFingerprint, True, True, 0, nan, nan]","[1, RDKFingerprint, True, True, 0, nan, nan]","[1, RDKFingerprint, True, True, 0, nan, nan]"
OCCN1CCN(CCCN2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1_1_RDKFingerprint_True_True,1,RDKFingerprint,True,True,0,0.595000,0.405000,"[1, RDKFingerprint, True, True, 0, 0.527573141...","[1, RDKFingerprint, True, True, 0, 0.589433826...","[1, RDKFingerprint, True, True, 0, 0.672241011...",...,"[1, RDKFingerprint, True, True, 0, nan, nan]","[1, RDKFingerprint, True, True, 1, nan, nan]",,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CC(C)N1CCN(c2ccc(OC[C@H]3CO[C@](Cn4cncn4)(c4ccc(Cl)cc4Cl)O3)cc2)CC1_33_RDKFingerprint_True_False,33,RDKFingerprint,True,False,0,0.860667,0.139333,"[33, RDKFingerprint, True, False, 0, 0.9374983...","[33, RDKFingerprint, True, False, 0, 0.8973594...","[33, RDKFingerprint, True, False, 0, 0.5112737...",...,"[33, RDKFingerprint, True, False, 0, nan, nan]","[33, RDKFingerprint, True, False, 1, nan, nan]",,,,,,,,
CSc1ccc2c(c1)N(CCC1CCCCN1C)c1ccccc1S2_33_RDKFingerprint_True_False,33,RDKFingerprint,True,False,0,0.975000,0.025000,"[33, RDKFingerprint, True, False, 0, 0.9472610...","[33, RDKFingerprint, True, False, 0, 0.9595287...","[33, RDKFingerprint, True, False, 0, 0.5102747...",...,"[33, RDKFingerprint, True, False, 0, nan, nan]","[33, RDKFingerprint, True, False, 0, nan, nan]",,,,,,,,
CN1CCN(CC/C=C2/c3ccccc3Sc3ccc(S(=O)(=O)N(C)C)cc32)CC1_33_RDKFingerprint_True_False,33,RDKFingerprint,True,False,0,0.960000,0.040000,"[33, RDKFingerprint, True, False, 0, 0.9699140...","[33, RDKFingerprint, True, False, 0, 0.9268728...","[33, RDKFingerprint, True, False, 0, 0.5120498...",...,"[33, RDKFingerprint, True, False, 0, nan, nan]","[33, RDKFingerprint, True, False, 0, nan, nan]",,,,,,,,
CCN(CC)CCOc1ccc2c(c1)C(=O)c1cc(OCCN(CC)CC)ccc1-2_33_RDKFingerprint_True_False,33,RDKFingerprint,True,False,0,0.809167,0.190833,"[33, RDKFingerprint, True, False, 0, 0.9223970...","[33, RDKFingerprint, True, False, 0, 0.8861371...","[33, RDKFingerprint, True, False, 0, 0.5081524...",...,"[33, RDKFingerprint, True, False, 0, nan, nan]","[33, RDKFingerprint, True, False, 0, nan, nan]",,,,,,,,


In [15]:
df_smiles_prediction_results = pd.DataFrame.from_dict(smiles_prediction_results)

ValueError: All arrays must be of the same length

In [13]:
df_smiles_prediction_results.to_csv('ZZ-smiles_prediction_diffmodel_results.csv', index=False)

NameError: name 'df_smiles_prediction_results' is not defined

[1,
 'RDKFingerprint',
 True,
 True,
 1,
 0.36,
 0.64,
 [1, 'RDKFingerprint', True, True, 1, 0.36, 0.64],
 [1, 'RDKFingerprint', True, True, 0, 0.8173124243691264, 0.1826875756308736],
 [1, 'RDKFingerprint', True, True, 0, 0.8173124243691264, 0.1826875756308736],
 [1, 'RDKFingerprint', True, True, 0, 0.7533262564060454, 0.24667374359395455],
 [1, 'RDKFingerprint', True, True, 0, 0.7533262564060454, 0.24667374359395455],
 [1, 'RDKFingerprint', True, True, 0, 0.6801297071122772, 0.3198702928877229],
 [1, 'RDKFingerprint', True, True, 0, 0.6801297071122772, 0.3198702928877229],
 [1, 'RDKFingerprint', True, True, 1, nan, nan],
 [1, 'RDKFingerprint', True, True, 1, nan, nan],
 [1, 'RDKFingerprint', True, True, 0, nan, nan],
 [1, 'RDKFingerprint', True, True, 0, nan, nan],
 [1, 'RDKFingerprint', True, True, 0, nan, nan],
 [1, 'RDKFingerprint', True, True, 0, nan, nan],
 [1, 'RDKFingerprint', True, True, 0, nan, nan],
 [1, 'RDKFingerprint', True, True, 0, nan, nan]]

In [23]:
test1 = {'CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O_1_RDKFingerprint_True_True': [1,
  'RDKFingerprint',
  True,
  True,
  1,
  0.48,
  0.52,
  [1,
   'RDKFingerprint',
   True,
   True,
   0,
   0.7945208079181986,
   0.20547919208180143],
  [1,
   'RDKFingerprint',
   True,
   True,
   0,
   0.6006540195113268,
   0.39934598048867315],
  [1, 'RDKFingerprint', True, True, 0, 0.674318326476439, 0.32568167352356103],
  [1, 'RDKFingerprint', True, True, 0, 'nan', 'nan'],
  [1, 'RDKFingerprint', True, True, 0, 'nan', 'nan'],
  [1, 'RDKFingerprint', True, True, 0, 'nan', 'nan'],
  [1, 'RDKFingerprint', True, True, 0, 'nan', 'nan']]}

In [25]:
len(test1['CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O_1_RDKFingerprint_True_True'])

14

In [26]:
key = 'CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O_1_RDKFingerprint_True_True'
first_seven_elements = test1[key][:7]  # This slices the list to get the first seven elements

# first_seven_elements now contains the first seven elements of the list
print(first_seven_elements)


[1, 'RDKFingerprint', True, True, 1, 0.48, 0.52]


In [27]:
test1

{'CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O_1_RDKFingerprint_True_True': [1,
  'RDKFingerprint',
  True,
  True,
  1,
  0.48,
  0.52,
  [1,
   'RDKFingerprint',
   True,
   True,
   0,
   0.7945208079181986,
   0.20547919208180143],
  [1,
   'RDKFingerprint',
   True,
   True,
   0,
   0.6006540195113268,
   0.39934598048867315],
  [1, 'RDKFingerprint', True, True, 0, 0.674318326476439, 0.32568167352356103],
  [1, 'RDKFingerprint', True, True, 0, 'nan', 'nan'],
  [1, 'RDKFingerprint', True, True, 0, 'nan', 'nan'],
  [1, 'RDKFingerprint', True, True, 0, 'nan', 'nan'],
  [1, 'RDKFingerprint', True, True, 0, 'nan', 'nan']]}