In [3]:
import pandas as pd
from striprtf.striprtf import rtf_to_text
import json
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import RobustScaler
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, ElasticNet, Lasso
from sklearn.decomposition import PCA
from xgboost import XGBRegressor, XGBClassifier
from sklearn.metrics import make_scorer, f1_score
from sklearn.pipeline import Pipeline

In [4]:
with open('algoparams_from_ui.json.rtf', 'r') as f: 
    rtfText = f.read() 
plainText = rtf_to_text(rtfText)
mainDict = json.loads(plainText)

In [5]:
session_info = mainDict['design_state_data']['session_info']
target = mainDict['design_state_data']['target']
train = mainDict['design_state_data']['train']
metrics = mainDict['design_state_data']['metrics']
feature_handling = mainDict['design_state_data']['feature_handling']
feature_generation = mainDict['design_state_data']['feature_generation']
feature_reduction = mainDict['design_state_data']['feature_reduction']
hyperparameters = mainDict['design_state_data']['hyperparameters']
weighting_stratergy = mainDict['design_state_data']['weighting_stratergy']
probability_calibration = mainDict['design_state_data']['probability_calibration']
algorithms = mainDict['design_state_data']['algorithms']

In [29]:
algorithms['RandomForestClassifier']['is_selected'] = True
algorithms['GBTRegressor']['is_selected'] = True

In [30]:
def getTarget(targetDict):
    target = targetDict['target']
    regtype = targetDict['type']
    return target, regtype

In [31]:
def lossGBT(algoDict, typ):
    if typ == 'reg':
        if algoDict['GBTClassifier']['use_deviance'] == True:
            return 'deviance'
        else:
            return 'exponential'
    elif typ == 'clf':
        if algoDict['GBTRegressor']['use_deviance'] == True:
            return 'deviance'
        else:
            return 'exponential' 

In [32]:
def transformStrToModelObj(modelStr, algoDict, regtype):
    if modelStr == 'RandomForestClassifier':
        return RandomForestClassifier(
            n_estimators = algoDict[modelStr]['max_trees'],
            max_depth = algoDict[modelStr]['max_depth'],
            min_samples_split = algoDict[modelStr]['min_samples_per_leaf_min_value']
        )
    elif modelStr == 'RandomForestRegressor':
        return RandomForestRegressor(
            n_estimators = algoDict[modelStr]['max_trees'],
            max_depth = algoDict[modelStr]['max_depth'],
            min_samples_split = algoDict[modelStr]['min_samples_per_leaf_min_value']
        )
    elif modelStr == 'GBTClassifier':
        return GradientBoostingClassifier(
            subsample = algoDict[modelStr]['min_subsample'],
            max_depth = algoDict[modelStr]['max_depth'],
            loss = lossGBT(algoDict, 'clf')
        )
    elif modelStr == 'GBTRegressor':
        return GradientBoostingRegressor(
            subsample = algoDict[modelStr]['min_subsample'],
            max_depth = algoDict[modelStr]['max_depth'],
            loss = lossGBT(algoDict, 'reg')
        )
    elif modelStr == 'LinearRegression':
        return LinearRegression()
    
    elif modelStr == 'LogisticRegression':
        return LogisticRegression(
            max_iter = algoDict[modelStr]['max_iter'],
            penalty='elasticnet',
            l1_ratio=algoDict[modelStr]['max_elasticnet']
        )
    elif modelStr == 'RidgeRegression':
        if type(algoDict[modelStr]['regularization_term']) in [int, float]:
            return Ridge(
                alpha = algoDict[modelStr]['regularization_term'],
                max_iter = algoDict[modelStr]['max_iter']
            )
        else:
            return Ridge(
                max_iter = algoDict[modelStr]['max_iter']
            )
    elif modelStr == 'LassoRegression':
        if type(algoDict[modelStr]['regularization_term']) in [int, float]:
            return Lasso(
                alpha = algoDict[modelStr]['regularization_term'],
                max_iter = algoDict[modelStr]['max_iter']
            )
        else:
            return Lasso(
                max_iter = algoDict[modelStr]['max_iter']
            )
    elif modelStr == 'ElasticNetRegression':
        if type(algoDict[modelStr]['regularization_term']) in [int, float]:
            return ElasticNet(
                alpha = algoDict[modelStr]['regularization_term'],
                max_iter = algoDict[modelStr]['max_iter']
            )
        else:
            return ElasticNet(
                max_iter = algoDict[modelStr]['max_iter']
            )
    elif modelStr == 'xgboost':
        if regtype == 'regression':
            if algoDict[modelStr]['dart'] == True:
                return XGBRegressor(
                    max_depth = algoDict[modelStr]['max_depth_of_tree'][0],
                    learning_rate = algoDict[modelStr]['learningRate'],
                    booster = 'dart',
                    gamma = algoDict[modelStr]['gamma'][0],
                    min_child_weight = algoDict[modelStr]['min_child_weight'][0],
                    reg_alpha = algoDict[modelStr]['l1_regularization'],
                    reg_lambda = algoDict[modelStr]['l2_regularization'],
                    random_state = algoDict[modelStr]['random_state'],
                    early_stopping_rounds = algoDict[modelStr]['early_stopping_rounds']
                    
                )
            else:
                return XGBRegressor(
                    max_depth = algoDict[modelStr]['max_depth_of_tree'][0],
                    learning_rate = algoDict[modelStr]['learningRate'],
                    gamma = algoDict[modelStr]['gamma'][0],
                    min_child_weight = algoDict[modelStr]['min_child_weight'][0],
                    reg_alpha = algoDict[modelStr]['l1_regularization'],
                    reg_lambda = algoDict[modelStr]['l2_regularization'],
                    random_state = algoDict[modelStr]['random_state'],
                    early_stopping_rounds = algoDict[modelStr]['early_stopping_rounds']
                    
                )
        elif regtype == 'classification':
            if algoDict[modelStr]['dart'] == True:
                return XGBClassifier(
                    max_depth = algoDict[modelStr]['max_depth_of_tree'][0],
                    learning_rate = algoDict[modelStr]['learningRate'],
                    booster = 'dart',
                    gamma = algoDict[modelStr]['gamma'][0],
                    min_child_weight = algoDict[modelStr]['min_child_weight'][0],
                    reg_alpha = algoDict[modelStr]['l1_regularization'],
                    reg_lambda = algoDict[modelStr]['l2_regularization'],
                    random_state = algoDict[modelStr]['random_state'],
                    early_stopping_rounds = algoDict[modelStr]['early_stopping_rounds']
                    
                )
            else:
                return XGBClassifier(
                    max_depth = algoDict[modelStr]['max_depth_of_tree'][0],
                    learning_rate = algoDict[modelStr]['learningRate'],
                    gamma = algoDict[modelStr]['gamma'][0],
                    min_child_weight = algoDict[modelStr]['min_child_weight'][0],
                    reg_alpha = algoDict[modelStr]['l1_regularization'],
                    reg_lambda = algoDict[modelStr]['l2_regularization'],
                    random_state = algoDict[modelStr]['random_state'],
                    early_stopping_rounds = algoDict[modelStr]['early_stopping_rounds']
                    
                )

In [33]:
def getAlgorithm(algoDict, targetDict):
    _, regtype = getTarget(targetDict)
    if regtype.lower() == 'regression':
        possibleModels = ['RandomForestRegressor', 'GBTRegressor', 'LinearRegression',
                          'RidgeRegression', 'LassoRegression', 'ElasticNetRegression',
                          'xg_boost', 'DecisionTreeRegressor', 'neural_network',
                          'SVM', 'SGD', 'KNN']
        selectedModels = []
        for models in possibleModels:
            if algoDict[models]['is_selected'] == True:
                selectedModels.append(models)
        modelsObjDict = {}
        for models in selectedModels:
            modelsObjDict[models] = transformStrToModelObj(models, algorithms, regtype)
        return modelsObjDict
        

    elif regtype.lower() == 'classification':
        possibleModels = ['RandomForestClassifier', 'GBTClassifier', 'LogisticRegression',
                          'xg_boost', 'DecisionTreeClassifier', 'neural_network',
                          'SVM', 'SGD', 'KNN']
        selectedModels = []
        for models in possibleModels:
            if algoDict[models]['is_selected'] == True:
                selectedModels.append(models)
        for i in selectedModels:
            modelsObjDict[i] = transformStrToModelObj(models, algorithms, regtype)
        return modelsObjDict

In [34]:
getAlgorithm(algorithms, target)

{'RandomForestRegressor': RandomForestRegressor(max_depth=25, min_samples_split=5, n_estimators=20),
 'GBTRegressor': GradientBoostingRegressor(loss='deviance', max_depth=7, subsample=1)}