In [2]:
import os
import pandas 
import numpy 
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils import shuffle
from sklearn.datasets import make_friedman1
from sklearn.svm import SVR
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import RadiusNeighborsClassifier

#Grid Search
from sklearn.experimental import enable_halving_search_cv  # noqa
from sklearn.model_selection import HalvingGridSearchCV
from sklearn.model_selection import GridSearchCV

#Extra Tree Classifer
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.datasets import make_classification

#one-hot encoding
from sklearn.preprocessing import LabelBinarizer

#grdient booting Classifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.linear_model import LogisticRegression

#radius neighbor classifier
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

import warnings  # To ignore warnings

def warn(*args, **kwargs):
    pass

warnings.warn = warn

## 1. Random Forest Multioutput Classifer

In [None]:
def RFMultiOutputClassifier(features, labels_n, validation, 
                            validationLabels_n, test):
    n_samples, n_features = features.shape
    
    forest = RandomForestClassifier(random_state=1)
    multi_target_forest = MultiOutputClassifier(forest, n_jobs = -1)
    model = multi_target_forest.fit(features, labels_n)
    
    print('Validation Score: ',
          multi_target_forest.score(validation, validationLabels_n))
    
    predicted_labels = multi_target_forest.predict(test)
    predicted_labelsV = multi_target_forest.predict(validation)
    
    return predicted_labels,predicted_labelsV

## 2. Random Forest Classifier Each Target

In [None]:
def RandomForestClassifierEachTarget(updatedFeatures , updatedValidation, 
                                     updatedTest, maxDepth, validationLabels_n, 
                                     labels_n, targetVal, noEst):
    #targetVal denotes the target which we wish to predit
    # i.e. Hdl_cholesterol_human, Cholesterol_ldl_human, Hemoglobin(hgb)_human
    
    y = labels_n[:, targetVal]
    clf = RandomForestClassifier(n_estimators = noEst, 
                                 max_depth = maxDepth, 
                                 random_state = 0)
    
    print(clf.fit(updatedFeatures, y))
    validationLabels = validationLabels_n[:,targetVal]
    validationLabels = validationLabels.reshape(-1,1)
    score = clf.score(updatedValidation, validationLabels)
    
    print("Score: ", score)
    
    predicted_labels = clf.predict(updatedTest)
    predicted_labelsV = clf.predict(updatedValidation)
    return predicted_labels,predicted_labelsV

## 3. Random Forest Classifier Each Target with grid search

In [None]:
def RandomForestClassifierEachTargetGS(updatedFeatures , updatedValidation, 
                                       updatedTest, maxDepth, 
                                       validationLabels_n, labels_n, targetVal, 
                                       noEst):
    #targetVal denotes the target which we wish to predit
    # i.e. Hdl_cholesterol_human, Cholesterol_ldl_human, Hemoglobin(hgb)_human

    param_grid = {'max_depth': [4, 6, 8, 10, 12], 
                  'min_samples_split': [2, 5, 10, 15, 20, 25, 30],
                  'criterion': ['gini', 'entropy']}
    
    base_estimator = RandomForestClassifier(random_state=0)
    y = labels_n[:, targetVal]
    
    sh = HalvingGridSearchCV(base_estimator, 
                             param_grid, 
                             cv = 5,
                             factor = 2, 
                             resource = 'n_estimators',
                             max_resources = 30).fit(updatedFeatures, y)
    
    print('Best Estimator :', sh.best_estimator_)
    
    validationLabels = validationLabels_n[:,targetVal]
    validationLabels = validationLabels.reshape(-1,1)
    score = sh.score(updatedValidation, validationLabels)
    
    print("Score: ", score)
    
    predicted_labels = sh.predict(updatedTest)
    predicted_labelsV = sh.predict(updatedValidation)

    return predicted_labels, predicted_labelsV

## 4. Extra Trees Classifier Each Target with Grid Search

In [None]:
def ExtraTreesClassifierEachTargetGS(updatedFeatures , updatedValidation, 
                                     updatedTest, maxDepth, validationLabels_n,
                                     labels_n, targetVal, noEst):
    #targetVal denotes the target which we wish to predit
    # i.e. Hdl_cholesterol_human, Cholesterol_ldl_human, Hemoglobin(hgb)_human
    y = labels_n[:, targetVal]
    param_grid = {'n_estimators':[4,6,8,10,12,14,15,16,18,20,25,30,40,50,60,70],
                'criterion':['gini', 'entropy']}
    
    base_estimator = ExtraTreesClassifier(random_state = 0)
    sh = HalvingGridSearchCV(base_estimator, 
                             param_grid, 
                             cv = 5,
                             factor = 2, 
                             max_resources = 30).fit(updatedFeatures, y)
    
    print('Best Estimator :', sh.best_estimator_)
    
    validationLabels = validationLabels_n[:,targetVal]
    validationLabels = validationLabels.reshape(-1,1)
    score = sh.score(updatedValidation, validationLabels)
    
    print("Score: ", score)
    
    predicted_labels = sh.predict(updatedTest)
    predicted_labelsV = sh.predict(updatedValidation)

    return predicted_labels,predicted_labelsV

### 5. Gradient Boosting Classifier

In [7]:
def GradientBoostingClassifierEachTargetGS(updatedFeatures , updatedValidation, 
                                           updatedTest, maxDepth, 
                                           validationLabels_n, labels_n, 
                                           targetVal, noEst):
    #targetVal denotes the target which we wish to predit
    # i.e. Hdl_cholesterol_human, Cholesterol_ldl_human, Hemoglobin(hgb)_human
    y = labels_n[:, targetVal]
    param_grid = {'n_estimators':[4, 6, 8, 10, 12, 15, 16, 18, 20, 50, 100],
                'learning_rate':[0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                'max_depth':[3, 4, 6, 8, 10, 12, 14, 16, 18, 20],
                'criterion':['mae','friedman_mse']}
    
    base_estimator = GradientBoostingClassifier(random_state = 0)
    sh = HalvingGridSearchCV(base_estimator, 
                             param_grid, 
                             cv = 5,
                             factor = 2, 
                             max_resources = 30).fit(updatedFeatures, y)
    print('Best Estimator :', sh.best_estimator_)
    
    validationLabels = validationLabels_n[:,targetVal]
    validationLabels = validationLabels.reshape(-1,1)
    score = sh.score(updatedValidation, validationLabels)
    
    print("Score: ", score)
    
    predicted_labels = sh.predict(updatedTest)
    predicted_labelsV = sh.predict(updatedValidation)

    return predicted_labels, predicted_labelsV

### 6. Logistic Regression 

In [1]:
def logisticRegressionEachTargetGS(updatedFeatures , updatedValidation, 
                                     updatedTest, maxDepth, validationLabels_n,
                                     labels_n, targetVal, noEst):
    #targetVal denotes the target which we wish to predit
    # i.e. Hdl_cholesterol_human, Cholesterol_ldl_human, Hemoglobin(hgb)_human
    y = labels_n[:, targetVal]

    clf = LogisticRegression(random_state = 0, 
                             solver = 'saga').fit(updatedFeatures, y)
    print(clf.get_params)
    validationLabels = validationLabels_n[:,targetVal]
    validationLabels = validationLabels.reshape(-1,1)
    score = clf.score(updatedValidation, validationLabels)
    
    print("Score: ", score)
    
    predicted_labels = clf.predict(updatedTest)
    predicted_labelsV = clf.predict(updatedValidation)

    return predicted_labels,predicted_labelsV

### 7. CatBoost

In [1]:
def CatBoostEachTargetGS(updatedFeatures , updatedValidation, 
                                     updatedTest, maxDepth, validationLabels_n,
                                     labels_n, targetVal, noEst):
    #targetVal denotes the target which we wish to predit
    # i.e. Hdl_cholesterol_human, Cholesterol_ldl_human, Hemoglobin(hgb)_human
    y = labels_n[:, targetVal]

     
    model = CatBoostClassifier()
    parameters = {'depth':[3, 1, 2, 6, 4, 5, 7, 8, 9, 10],
          'iterations':[250, 100, 500,1000],
          'learning_rate':[0.03, 0.001, 0.01, 0.1, 0.2, 0.3], 
          'l2_leaf_reg':[3, 1, 5, 10, 100],
          'border_count':[32, 5, 10, 20, 50, 100, 200]
                 }
    
    randm = RandomizedSearchCV(estimator=model, param_distributions = parameters, 
                               cv = 2, n_iter = 10, n_jobs=-1)
    randm.fit(updatedFeatures, y)



    print(randm.get_params)
    validationLabels = validationLabels_n[:,targetVal]
    validationLabels = validationLabels.reshape(-1,1)
    score = randm.score(updatedValidation, validationLabels)
    
    print("Score: ", score)
    
    predicted_labels = randm.predict(updatedTest)
    predicted_labelsV = randm.predict(updatedValidation)

    return predicted_labels,predicted_labelsV

### 8. Radius Neighbors Classifier

In [None]:
def RadiusNeighborsClassifierEachTargetGS(updatedFeatures , updatedValidation, 
                                     updatedTest, validationLabels_n,
                                     labels_n, targetVal):
    #targetVal denotes the target which we wish to predit
    # i.e. Hdl_cholesterol_human, Cholesterol_ldl_human, Hemoglobin(hgb)_human
    y = labels_n[:, targetVal]

    model = RadiusNeighborsClassifier()
    pipeline = Pipeline(steps = [('norm', MinMaxScaler()),('model',model)])
    cv = RepeatedStratifiedKFold(n_splits = 10, 
                                 n_repeats = 3, 
                                 random_state = 1)
    grid = dict()
    grid['model__radius'] = numpy.arange(0.8, 1.2, 1.6)
    search = GridSearchCV(pipeline, grid, scoring = 'f1_weighted', 
                          cv = cv, 
                          n_jobs = -1)
    results = search.fit(updatedFeatures, y)
    bestparams = results.best_params_

    # summarize
    print('Best Parameters: %s' % results.best_params_)

    neigh = RadiusNeighborsClassifier(**bestparams)
    print(neigh.fit(updatedFeatures, y))
    validationLabels = validationLabels_n[:,targetVal]
    validationLabels = validationLabels.reshape(-1,1)
    score = neigh.score(updatedValidation, validationLabels)
    
    print("Score: ", score)
    
    predicted_labels = neigh.predict(updatedTest)
    predicted_labelsV = neigh.predict(updatedValidation)

    return predicted_labels,predicted_labelsV