In [None]:
import itertools
import math
from hyperopt import hp
import xgboost as xgb
import lightgbm as lgb
from IPython.display import Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import re
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")

from lmf import LoadModelFunction
modeling = LoadModelFunction()

from utils import mpr_report, mpr_report_en, final_fitting
from utils2 import calc_iv, get_redundant_pairs, get_top_abs_correlations, roc_curve_graph,\
                    performance_table, plot_ks_graph, plot_deciles_performance, table_summary,\
                    remove_highcorr_vars, auc_by_groups, model_performance_metrics



In [None]:
df = pd.read_csv("..")

predictive_cols = [" "] #all features
target = "target"
features = predictive_cols

predictive_cols = pd.read_csv("feature importance")
predictive_cols.columns = ['0','importance']
predictive_cols = predictive_cols[predictive_cols['importance'] > 0.02]

train_x = df[df['X_fold'].isin(['train','valid'])][features]
train_y = df[df['X_fold'].isin(['train','valid'])][target]

test_x = df[df['X_fold'] == 'test'][features]
test_y = df[df['X_fold'] == 'test'][target]

serach_space = {
    'k_folds': 5
    'k_split': 'non_ts',

    'f_method': hp.choice('f_method',['all']),
    'num_feats': hp.choice('num_feats',[#insert number of features here]),

    'scaler': hp.choice('scaler',['noscaler']),

    'SEED': 2024    
}

hyperopt_file = #directory to hyperopt file
bestLoss, bestParams, results = modeling.load_result(hyperopt_file)
results_df = pd.DataFrame(results).T.reset_index().rename(columns={'index':'Precision'})
results_df = results_df[['f_method', 'num_feats'] + \
                        [i for i in results_df.columns if i not in ['f_method','num_feats','Precision']] + \
                            ['Precision']].rename(columns = {'f_method': 'Feature selection method', 'num_feats': 'Num of Features'})
del results_df['m_method']
del results_df['k_folds']
del results_df['SEED']
del results_df['k_split']
results_df['Precision'] = -results_df['Precision']


In [None]:
model_config = {
    'lgbm': {
        'n_estimators': hp.choice('n_estimators', [1000]),
        'learning_rate': hp.choice('n_estimators', [0.1]),
        'num_leaves': hp.choice('num_leaves',[50]),
        'max_depth': hp.choice('max_depth',[5]),
        'reg_alpha': hp.choice('reg_alpha',[0.1]),
        'reg_lambda': hp.choice('reg_lambda',[0.1]),
        'colsample_bytree': hp.choice('colsample_bytree',[1.0]),
        'subsample': hp.choice('subsample', [1.0])
    }
}

def run_model(X, y, search_space, model_type, model_config, model_dir, num_trials):
    search_space_updated = search_space.copy()
    search_space_updated['m_method'] = hp.choice('m_method', model_type)
    search_space_updated.update(model_config[model_type[0]])
    hyperopt_file = #directory to hyperopt_file

    modeling.hyperparameter_tuning(X,y num_trials, hyperopt_file, search_space_updated)



In [None]:
# Test if it works
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(penalty = 'l1', C=0.9, solver = 'saga', n_jobs= -1)
lr.fit(train_x, train_y)

model_type = ['lgbm']

run_model(test_x, test_y, search_space, model_type, model_config, model_dir, 10)

threshold_list = [0.8,0.85,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99]

model_list = ['lgbm']
list_of_sample = [['test'],['outoftime'],['test', 'outoftime']]

score_df = final_fitting(modeling, model_dir, model_list, df, features, target, non_predictive_cols)

mpr_all = mpr_report(model_list,[['test'],['outoftime'],['test','outoftime']], threshold_list, score_df, target)
mpr_all.tocsv('save to dir')

model_list = ['lgmb']
features_df = pd.DataFrame({'features': list(predictive_cols['0'])})