## Credit Card Approval Experiment Tracking

In [1]:
!python -V

Python 3.11.3


In [2]:
import requests
import pickle

import pandas as pd

import mlflow

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.svm import LinearSVR

from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

import os
from random import random, randint
from mlflow import log_metric, log_param, log_params, log_artifact

from tqdm import tqdm

In [3]:
# Data download.
data = pd.read_csv('data/application_record.csv')
record = pd.read_csv('data/credit_record.csv')

**Data Exploration**

In [4]:
# find all users' account open month.
begin_month=pd.DataFrame(record.groupby(["ID"])["MONTHS_BALANCE"].agg(min))
begin_month=begin_month.rename(columns={'MONTHS_BALANCE':'begin_month'}) 

new_data=pd.merge(data,begin_month,how="left",on="ID") #merge to record datatrain_raw_data.head()

In [5]:
# Assuming 'record' is your DataFrame containing the 'STATUS' and 'dep_value' columns.
record['dep_value'] = None
record.loc[record['STATUS'].isin(['2', '3', '4', '5']), 'dep_value'] = 'Yes'

cpunt=record.groupby('ID').count()
cpunt['dep_value'][cpunt['dep_value'] > 0]='Yes' 
cpunt['dep_value'][cpunt['dep_value'] == 0]='No' 
cpunt = cpunt[['dep_value']]

merge_data=pd.merge(new_data,cpunt,how='inner',on='ID')
merge_data.head()

Unnamed: 0,ID,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,begin_month,dep_value
0,5008804,M,Y,Y,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,-12005,-4542,1,1,0,0,,2.0,-15.0,No
1,5008805,M,Y,Y,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,-12005,-4542,1,1,0,0,,2.0,-14.0,No
2,5008806,M,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,-21474,-1134,1,0,0,0,Security staff,2.0,-29.0,No
3,5008808,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,1,0,1,1,Sales staff,1.0,-4.0,No
4,5008809,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,1,0,1,1,Sales staff,1.0,-26.0,No


In [6]:
# Assuming 'new_data' is your DataFrame.
# Check for missing values (NaN) in the DataFrame
print(merge_data.isna().sum())

# Drop rows with any NaN values
merge_data.dropna(inplace=True)

# Display the first few rows of the cleaned DataFrame
merge_data.head()

ID                         0
CODE_GENDER                0
FLAG_OWN_CAR               0
FLAG_OWN_REALTY            0
CNT_CHILDREN               0
AMT_INCOME_TOTAL           0
NAME_INCOME_TYPE           0
NAME_EDUCATION_TYPE        0
NAME_FAMILY_STATUS         0
NAME_HOUSING_TYPE          0
DAYS_BIRTH                 0
DAYS_EMPLOYED              0
FLAG_MOBIL                 0
FLAG_WORK_PHONE            0
FLAG_PHONE                 0
FLAG_EMAIL                 0
OCCUPATION_TYPE        11323
CNT_FAM_MEMBERS            0
begin_month                0
dep_value                  0
dtype: int64


Unnamed: 0,ID,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,begin_month,dep_value
2,5008806,M,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,-21474,-1134,1,0,0,0,Security staff,2.0,-29.0,No
3,5008808,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,1,0,1,1,Sales staff,1.0,-4.0,No
4,5008809,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,1,0,1,1,Sales staff,1.0,-26.0,No
5,5008810,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,1,0,1,1,Sales staff,1.0,-26.0,No
6,5008811,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,1,0,1,1,Sales staff,1.0,-38.0,No


In [7]:
# Now, we remove OCCUPATION_TYPE. As we said, it have too many missed data
# In addition, we will get rid of FLAG_MOBIL, FLAG_WORK_PHONE, FLAG_PHONE and FLAG_EMAIL. 
# These parameters do not affect in any way whether a person is creditworthy or not.

merge_data.drop(['ID','FLAG_WORK_PHONE','FLAG_PHONE','FLAG_EMAIL'], axis=1, inplace=True) 

In [8]:
# Now, let's convert all non-numeric data to numeric data using LabelEncoder().
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for x in merge_data:
    if merge_data[x].dtypes == 'object':
        merge_data[x] = le.fit_transform(merge_data[x])

In [9]:
data = merge_data.copy()

In [10]:
print(data.isna().sum())

CODE_GENDER            0
FLAG_OWN_CAR           0
FLAG_OWN_REALTY        0
CNT_CHILDREN           0
AMT_INCOME_TOTAL       0
NAME_INCOME_TYPE       0
NAME_EDUCATION_TYPE    0
NAME_FAMILY_STATUS     0
NAME_HOUSING_TYPE      0
DAYS_BIRTH             0
DAYS_EMPLOYED          0
FLAG_MOBIL             0
OCCUPATION_TYPE        0
CNT_FAM_MEMBERS        0
begin_month            0
dep_value              0
dtype: int64


In [11]:
data = data.astype(int)
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 25134 entries, 2 to 36456
Data columns (total 16 columns):
 #   Column               Non-Null Count  Dtype
---  ------               --------------  -----
 0   CODE_GENDER          25134 non-null  int32
 1   FLAG_OWN_CAR         25134 non-null  int32
 2   FLAG_OWN_REALTY      25134 non-null  int32
 3   CNT_CHILDREN         25134 non-null  int32
 4   AMT_INCOME_TOTAL     25134 non-null  int32
 5   NAME_INCOME_TYPE     25134 non-null  int32
 6   NAME_EDUCATION_TYPE  25134 non-null  int32
 7   NAME_FAMILY_STATUS   25134 non-null  int32
 8   NAME_HOUSING_TYPE    25134 non-null  int32
 9   DAYS_BIRTH           25134 non-null  int32
 10  DAYS_EMPLOYED        25134 non-null  int32
 11  FLAG_MOBIL           25134 non-null  int32
 12  OCCUPATION_TYPE      25134 non-null  int32
 13  CNT_FAM_MEMBERS      25134 non-null  int32
 14  begin_month          25134 non-null  int32
 15  dep_value            25134 non-null  int32
dtypes: int32(16)
memory us

In [12]:
X = data.iloc[:,1:-1] # X value contains all the variables except labels
y = data.iloc[:,-1] # these are the labels
y

2        0
3        0
4        0
5        0
6        0
        ..
36452    1
36453    1
36454    1
36455    1
36456    1
Name: dep_value, Length: 25134, dtype: int32

In [13]:
# Split train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [14]:
oversample = SMOTE()
X_balanced, y_balanced = oversample.fit_resample(X_train, y_train)
X_test_balanced, y_test_balanced = oversample.fit_resample(X_test, y_test)

## Simple Experimnet 

**Data Preprocessing**

In [15]:
mlflow.autolog()

lr = Ridge()
lr.fit(X_balanced, y_balanced)

y_pred = lr.predict(X_test_balanced)

mean_squared_error(y_test_balanced, y_pred, squared=False)



0.4314495454189201

## MLflow tracking

In [16]:
import mlflow

In [17]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("Test LR")

2023/07/22 21:32:42 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2023/07/22 21:32:42 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade  -> 451aebb31d03, add metric step
INFO  [alembic.runtime.migration] Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
INFO  [alembic.runtime.migration] Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
INFO  [alembic.runtime.migration] Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
INFO  [alembic.runtime.migration] Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
INFO  [alembic.runtime.migration] Running upgrade 7ac759974ad8 -> 89d4b8295536, create latest metrics table
INFO  [89d4b8295536_create_latest_metrics_table_py] Migration complete!
INFO  

<Experiment: artifact_location='file:///D:/Project Pycharm/creditcard_approval_prediction/mlruns/1', creation_time=1690036363640, experiment_id='1', last_update_time=1690036363640, lifecycle_stage='active', name='Test LR', tags={}>

In [18]:
with mlflow.start_run():
    mlflow.set_tag("workspace", "Homework 2")
    
    # Log the data file paths as parameters
    mlflow.log_param('application_record_file', 'data/application_record.csv')
    mlflow.log_param('credit_record_file', 'data/credit_record.csv')
    
    alpha = 0.99
    mlflow.log_param("alpha", alpha)
    
    lr = Lasso(alpha)
    lr.fit(X_balanced, y_balanced) 
    y_pred = lr.predict(X_test_balanced)

    rmse = mean_squared_error(y_test_balanced, y_pred, squared=False)
    
    mlflow.log_metric("rmse", rmse)

## Hyperparameters Optimization

In [19]:
import xgboost as xgb

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

2023/07/22 21:37:45 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.


In [20]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("test XGB")

2023/07/22 21:37:51 INFO mlflow.tracking.fluent: Experiment with name 'test XGB' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///D:/Project Pycharm/creditcard_approval_prediction/mlruns/2', creation_time=1690036671665, experiment_id='2', last_update_time=1690036671665, lifecycle_stage='active', name='test XGB', tags={}>

In [None]:
# X_balanced, y_balanced = oversample.fit_resample(X_train, y_train)
# X_test_balanced, y_test_balanced = oversample.fit_resample(X_test, y_test)

In [21]:
train = xgb.DMatrix(X_balanced, label=y_balanced)
validation = xgb.DMatrix(X_test_balanced, label=y_test_balanced)

In [22]:
def objective(params):
    with mlflow.start_run():
        num_boost_round = 500
        early_stopping_rounds = 50
        
        mlflow.log_params(params)
        mlflow.log_param('num_boost_round', num_boost_round)
        mlflow.log_param('early_stopping_rounds', early_stopping_rounds)
        mlflow.log_param('train_data_name', 'X_balanced')
        mlflow.log_param('validation_data_name', 'X_test_balanced')
        mlflow.set_tag('model', 'xgboost')

        booster = xgb.train(
            params = params,
            dtrain = train,
            evals = [(validation, "validation")],
            num_boost_round = num_boost_round,
            early_stopping_rounds = early_stopping_rounds
        )
        
        y_pred = booster.predict(validation)
        rmse = mean_squared_error(y_test_balanced, y_pred, squared=False)
        mlflow.log_metric('rmse', rmse)
        return {'loss': rmse, 'status': STATUS_OK}

In [23]:
grid_search = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child': hp.loguniform('min_child', -1, 3),
    'seed': 111,
    'objective': 'reg:linear'
}

In [24]:
best_model = fmin(
    fn=objective,
    space=grid_search,
    algo=tpe.suggest,
    max_evals=30,
    trials=Trials()
)

Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44729                                                                                            
[1]	validation-rmse:0.43500                                                                                            
[2]	validation-rmse:0.43615                                                                                            
[3]	validation-rmse:0.44473                                                                                            
[4]	validation-rmse:0.45009                                                                                            
[5]	validation-rmse:0.45454                                                                                            
[6]	validation-rmse:0.45906                                                                                            
[7]	validation-rmse:0.46275                                                                                          





Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44742                                                                                            
[1]	validation-rmse:0.43493                                                                                            
[2]	validation-rmse:0.43842                                                                                            
[3]	validation-rmse:0.44589                                                                                            
[4]	validation-rmse:0.45048                                                                                            
[5]	validation-rmse:0.45532                                                                                            
[6]	validation-rmse:0.45920                                                                                            
[7]	validation-rmse:0.46223                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44708                                                                                            
[1]	validation-rmse:0.43282                                                                                            
[2]	validation-rmse:0.43265                                                                                            
[3]	validation-rmse:0.43685                                                                                            
[4]	validation-rmse:0.44294                                                                                            
[5]	validation-rmse:0.44809                                                                                            
[6]	validation-rmse:0.45258                                                                                            
[7]	validation-rmse:0.45660                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44652                                                                                            
[1]	validation-rmse:0.43277                                                                                            
[2]	validation-rmse:0.43144                                                                                            
[3]	validation-rmse:0.43612                                                                                            
[4]	validation-rmse:0.44174                                                                                            
[5]	validation-rmse:0.44656                                                                                            
[6]	validation-rmse:0.45020                                                                                            
[7]	validation-rmse:0.45300                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44745                                                                                            
[1]	validation-rmse:0.43492                                                                                            
[2]	validation-rmse:0.43852                                                                                            
[3]	validation-rmse:0.44610                                                                                            
[4]	validation-rmse:0.45074                                                                                            
[5]	validation-rmse:0.45555                                                                                            
[6]	validation-rmse:0.45931                                                                                            
[7]	validation-rmse:0.46217                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44675                                                                                            
[1]	validation-rmse:0.43051                                                                                            
[2]	validation-rmse:0.42639                                                                                            
[3]	validation-rmse:0.42879                                                                                            
[4]	validation-rmse:0.42967                                                                                            
[5]	validation-rmse:0.43366                                                                                            
[6]	validation-rmse:0.43674                                                                                            
[7]	validation-rmse:0.43834                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44887                                                                                            
[1]	validation-rmse:0.43915                                                                                            
[2]	validation-rmse:0.44602                                                                                            
[3]	validation-rmse:0.45786                                                                                            
[4]	validation-rmse:0.46690                                                                                            
[5]	validation-rmse:0.47164                                                                                            
[6]	validation-rmse:0.47652                                                                                            
[7]	validation-rmse:0.47898                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44642                                                                                            
[1]	validation-rmse:0.43258                                                                                            
[2]	validation-rmse:0.43183                                                                                            
[3]	validation-rmse:0.43530                                                                                            
[4]	validation-rmse:0.44015                                                                                            
[5]	validation-rmse:0.44450                                                                                            
[6]	validation-rmse:0.44792                                                                                            
[7]	validation-rmse:0.45114                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44743                                                                                            
[1]	validation-rmse:0.43038                                                                                            
[2]	validation-rmse:0.42685                                                                                            
[3]	validation-rmse:0.42887                                                                                            
[4]	validation-rmse:0.43352                                                                                            
[5]	validation-rmse:0.43693                                                                                            
[6]	validation-rmse:0.44046                                                                                            
[7]	validation-rmse:0.44320                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44645                                                                                            
[1]	validation-rmse:0.42300                                                                                            
[2]	validation-rmse:0.41713                                                                                            
[3]	validation-rmse:0.41883                                                                                            
[4]	validation-rmse:0.42098                                                                                            
[5]	validation-rmse:0.42483                                                                                            
[6]	validation-rmse:0.42933                                                                                            
[7]	validation-rmse:0.43092                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44927                                                                                            
[1]	validation-rmse:0.44026                                                                                            
[2]	validation-rmse:0.44798                                                                                            
[3]	validation-rmse:0.46050                                                                                            
[4]	validation-rmse:0.47222                                                                                            
[5]	validation-rmse:0.48021                                                                                            
[6]	validation-rmse:0.48711                                                                                            
[7]	validation-rmse:0.48995                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44755                                                                                            
[1]	validation-rmse:0.43642                                                                                            
[2]	validation-rmse:0.44087                                                                                            
[3]	validation-rmse:0.44943                                                                                            
[4]	validation-rmse:0.45389                                                                                            
[5]	validation-rmse:0.45792                                                                                            
[6]	validation-rmse:0.46141                                                                                            
[7]	validation-rmse:0.46386                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44294                                                                                            
[1]	validation-rmse:0.41366                                                                                            
[2]	validation-rmse:0.39944                                                                                            
[3]	validation-rmse:0.39305                                                                                            
[4]	validation-rmse:0.38937                                                                                            
[5]	validation-rmse:0.38948                                                                                            
[6]	validation-rmse:0.38874                                                                                            
[7]	validation-rmse:0.38862                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44707                                                                                            
[1]	validation-rmse:0.43356                                                                                            
[2]	validation-rmse:0.43276                                                                                            
[3]	validation-rmse:0.43553                                                                                            
[4]	validation-rmse:0.44249                                                                                            
[5]	validation-rmse:0.44683                                                                                            
[6]	validation-rmse:0.45105                                                                                            
[7]	validation-rmse:0.45443                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44746                                                                                            
[1]	validation-rmse:0.43490                                                                                            
[2]	validation-rmse:0.43890                                                                                            
[3]	validation-rmse:0.44257                                                                                            
[4]	validation-rmse:0.44504                                                                                            
[5]	validation-rmse:0.44982                                                                                            
[6]	validation-rmse:0.45321                                                                                            
[7]	validation-rmse:0.45598                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44693                                                                                            
[1]	validation-rmse:0.43328                                                                                            
[2]	validation-rmse:0.43300                                                                                            
[3]	validation-rmse:0.43598                                                                                            
[4]	validation-rmse:0.44100                                                                                            
[5]	validation-rmse:0.44306                                                                                            
[6]	validation-rmse:0.44585                                                                                            
[7]	validation-rmse:0.44788                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44746                                                                                            
[1]	validation-rmse:0.43491                                                                                            
[2]	validation-rmse:0.43877                                                                                            
[3]	validation-rmse:0.44238                                                                                            
[4]	validation-rmse:0.44639                                                                                            
[5]	validation-rmse:0.44960                                                                                            
[6]	validation-rmse:0.45195                                                                                            
[7]	validation-rmse:0.45399                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44766                                                                                            
[1]	validation-rmse:0.43654                                                                                            
[2]	validation-rmse:0.43949                                                                                            
[3]	validation-rmse:0.44318                                                                                            
[4]	validation-rmse:0.44849                                                                                            
[5]	validation-rmse:0.45169                                                                                            
[6]	validation-rmse:0.45541                                                                                            
[7]	validation-rmse:0.45808                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44749                                                                                            
[1]	validation-rmse:0.43536                                                                                            
[2]	validation-rmse:0.43825                                                                                            
[3]	validation-rmse:0.44477                                                                                            
[4]	validation-rmse:0.44894                                                                                            
[5]	validation-rmse:0.45327                                                                                            
[6]	validation-rmse:0.45713                                                                                            
[7]	validation-rmse:0.46015                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44402                                                                                            
[1]	validation-rmse:0.42166                                                                                            
[2]	validation-rmse:0.41720                                                                                            
[3]	validation-rmse:0.42191                                                                                            
[4]	validation-rmse:0.42747                                                                                            
[5]	validation-rmse:0.43131                                                                                            
[6]	validation-rmse:0.43679                                                                                            
[7]	validation-rmse:0.44133                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.46940                                                                                            
[1]	validation-rmse:0.45155                                                                                            
[2]	validation-rmse:0.43804                                                                                            
[3]	validation-rmse:0.42651                                                                                            
[4]	validation-rmse:0.41845                                                                                            
[5]	validation-rmse:0.41132                                                                                            
[6]	validation-rmse:0.40749                                                                                            
[7]	validation-rmse:0.40505                                                                                          

[64]	validation-rmse:0.36940                                                                                           
[65]	validation-rmse:0.36926                                                                                           
[66]	validation-rmse:0.36933                                                                                           
[67]	validation-rmse:0.36910                                                                                           
[68]	validation-rmse:0.37012                                                                                           
[69]	validation-rmse:0.36989                                                                                           
[70]	validation-rmse:0.36974                                                                                           
[71]	validation-rmse:0.36949                                                                                           
[72]	validation-rmse:0.36945            

[132]	validation-rmse:0.36909                                                                                          
[133]	validation-rmse:0.36844                                                                                          
[134]	validation-rmse:0.36815                                                                                          
[135]	validation-rmse:0.36787                                                                                          
[136]	validation-rmse:0.36788                                                                                          
[137]	validation-rmse:0.36781                                                                                          
[138]	validation-rmse:0.36780                                                                                          
[139]	validation-rmse:0.36723                                                                                          
[140]	validation-rmse:0.36732           

[200]	validation-rmse:0.36622                                                                                          
[201]	validation-rmse:0.36634                                                                                          
[202]	validation-rmse:0.36625                                                                                          
[203]	validation-rmse:0.36663                                                                                          
[204]	validation-rmse:0.36618                                                                                          
[205]	validation-rmse:0.36624                                                                                          
[206]	validation-rmse:0.36611                                                                                          
[207]	validation-rmse:0.36612                                                                                          
[208]	validation-rmse:0.36619           

[268]	validation-rmse:0.36604                                                                                          
[269]	validation-rmse:0.36606                                                                                          
[270]	validation-rmse:0.36636                                                                                          
[271]	validation-rmse:0.36654                                                                                          
[272]	validation-rmse:0.36648                                                                                          
[273]	validation-rmse:0.36639                                                                                          
[274]	validation-rmse:0.36635                                                                                          
[275]	validation-rmse:0.36624                                                                                          
[276]	validation-rmse:0.36606           




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.45131                                                                                            
[1]	validation-rmse:0.42418                                                                                            
[2]	validation-rmse:0.40986                                                                                            
[3]	validation-rmse:0.40439                                                                                            
[4]	validation-rmse:0.39681                                                                                            
[5]	validation-rmse:0.39277                                                                                            
[6]	validation-rmse:0.38922                                                                                            
[7]	validation-rmse:0.38564                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.46293                                                                                            
[1]	validation-rmse:0.43871                                                                                            
[2]	validation-rmse:0.42523                                                                                            
[3]	validation-rmse:0.41704                                                                                            
[4]	validation-rmse:0.40539                                                                                            
[5]	validation-rmse:0.39987                                                                                            
[6]	validation-rmse:0.39615                                                                                            
[7]	validation-rmse:0.39235                                                                                          

[64]	validation-rmse:0.37532                                                                                           
[65]	validation-rmse:0.37531                                                                                           
[66]	validation-rmse:0.37516                                                                                           
[67]	validation-rmse:0.37512                                                                                           
[68]	validation-rmse:0.37503                                                                                           
[69]	validation-rmse:0.37557                                                                                           
[70]	validation-rmse:0.37640                                                                                           
[71]	validation-rmse:0.37617                                                                                           
[72]	validation-rmse:0.37600            




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44437                                                                                            
[1]	validation-rmse:0.41906                                                                                            
[2]	validation-rmse:0.41527                                                                                            
[3]	validation-rmse:0.41617                                                                                            
[4]	validation-rmse:0.41868                                                                                            
[5]	validation-rmse:0.42288                                                                                            
[6]	validation-rmse:0.42516                                                                                            
[7]	validation-rmse:0.42690                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.46940                                                                                            
[1]	validation-rmse:0.45154                                                                                            
[2]	validation-rmse:0.43804                                                                                            
[3]	validation-rmse:0.42650                                                                                            
[4]	validation-rmse:0.41845                                                                                            
[5]	validation-rmse:0.41131                                                                                            
[6]	validation-rmse:0.40748                                                                                            
[7]	validation-rmse:0.40505                                                                                          

[64]	validation-rmse:0.37392                                                                                           
[65]	validation-rmse:0.37422                                                                                           
[66]	validation-rmse:0.37409                                                                                           
[67]	validation-rmse:0.37383                                                                                           
[68]	validation-rmse:0.37364                                                                                           
[69]	validation-rmse:0.37339                                                                                           
[70]	validation-rmse:0.37332                                                                                           
[71]	validation-rmse:0.37309                                                                                           
[72]	validation-rmse:0.37349            




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44671                                                                                            
[1]	validation-rmse:0.42516                                                                                            
[2]	validation-rmse:0.42077                                                                                            
[3]	validation-rmse:0.42415                                                                                            
[4]	validation-rmse:0.42776                                                                                            
[5]	validation-rmse:0.43010                                                                                            
[6]	validation-rmse:0.43337                                                                                            
[7]	validation-rmse:0.43611                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.43882                                                                                            
[1]	validation-rmse:0.41101                                                                                            
[2]	validation-rmse:0.40263                                                                                            
[3]	validation-rmse:0.40300                                                                                            
[4]	validation-rmse:0.40200                                                                                            
[5]	validation-rmse:0.40529                                                                                            
[6]	validation-rmse:0.40892                                                                                            
[7]	validation-rmse:0.41145                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44754                                                                                            
[1]	validation-rmse:0.43587                                                                                            
[2]	validation-rmse:0.43900                                                                                            
[3]	validation-rmse:0.44593                                                                                            
[4]	validation-rmse:0.45425                                                                                            
[5]	validation-rmse:0.45905                                                                                            
[6]	validation-rmse:0.46274                                                                                            
[7]	validation-rmse:0.46511                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.44661                                                                                            
[1]	validation-rmse:0.42890                                                                                            
[2]	validation-rmse:0.42449                                                                                            
[3]	validation-rmse:0.42557                                                                                            
[4]	validation-rmse:0.42772                                                                                            
[5]	validation-rmse:0.43188                                                                                            
[6]	validation-rmse:0.43500                                                                                            
[7]	validation-rmse:0.43784                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.46292                                                                                            
[1]	validation-rmse:0.43870                                                                                            
[2]	validation-rmse:0.42520                                                                                            
[3]	validation-rmse:0.41701                                                                                            
[4]	validation-rmse:0.40392                                                                                            
[5]	validation-rmse:0.39834                                                                                            
[6]	validation-rmse:0.39461                                                                                            
[7]	validation-rmse:0.39107                                                                                          

[64]	validation-rmse:0.37315                                                                                           
[65]	validation-rmse:0.37335                                                                                           
[66]	validation-rmse:0.37305                                                                                           
[67]	validation-rmse:0.37314                                                                                           
[68]	validation-rmse:0.37295                                                                                           
[69]	validation-rmse:0.37311                                                                                           
[70]	validation-rmse:0.37289                                                                                           
[71]	validation-rmse:0.37324                                                                                           
[72]	validation-rmse:0.37328            




100%|███████████████████████████████████████████████| 30/30 [06:30<00:00, 13.03s/trial, best loss: 0.36569041725866674]


In [25]:
best_model

{'max_depth': 4.0,
 'min_child': 20.08141443058751,
 'reg_alpha': 0.3410417543784273,
 'reg_lambda': 0.058102852057670594}

## Train the Best Model

In [26]:
import xgboost as xgb

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

import mlflow.xgboost

In [27]:
# Enable MLflow autologging for XGBoost
mlflow.xgboost.autolog()

In [28]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("Best Model")

2023/07/22 21:45:27 INFO mlflow.tracking.fluent: Experiment with name 'Best Model' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///D:/Project Pycharm/creditcard_approval_prediction/mlruns/3', creation_time=1690037127009, experiment_id='3', last_update_time=1690037127009, lifecycle_stage='active', name='Best Model', tags={}>

In [30]:
#We took the best params from the MLflow interface and copien them here

best_params = {'max_depth': 4,
 'min_child': 20.08141443058751,
 'reg_alpha': 0.3410417543784273,
 'reg_lambda': 0.058102852057670594}

mlflow.xgboost.autolog()

booster = xgb.train(
    params = best_params,
    dtrain = train,
    evals = [(validation, "validation")],
    num_boost_round = 500,
    early_stopping_rounds = 50,
)

2023/07/22 21:46:19 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '9d1ab1e46be14ef6a690774f9508e98e', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow


Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.46940
[1]	validation-rmse:0.45155
[2]	validation-rmse:0.43804
[3]	validation-rmse:0.42651
[4]	validation-rmse:0.41845
[5]	validation-rmse:0.41132
[6]	validation-rmse:0.40749
[7]	validation-rmse:0.40505
[8]	validation-rmse:0.39916
[9]	validation-rmse:0.39406
[10]	validation-rmse:0.39294
[11]	validation-rmse:0.39229
[12]	validation-rmse:0.39344
[13]	validation-rmse:0.39086
[14]	validation-rmse:0.39144
[15]	validation-rmse:0.38880
[16]	validation-rmse:0.38523
[17]	validation-rmse:0.38454
[18]	validation-rmse:0.38464
[19]	validation-rmse:0.38188
[20]	validation-rmse:0.38136
[21]	validation-rmse:0.38184
[22]	validation-rmse:0.38181
[23]	validation-rmse:0.38015
[24]	validation-rmse:0.38012
[25]	validation-rmse:0.37820
[26]	validation-rmse:0.37753
[27]	validation-rmse:0.37760
[28]	validation-rmse:0.37692
[29]	validation-rmse:0.37609
[30]	validation-rmse:0.37731
[31]	validation-rmse:0.37743
[32]	validation-rmse:0.37738
[33]	valid

[271]	validation-rmse:0.36654
[272]	validation-rmse:0.36648
[273]	validation-rmse:0.36639
[274]	validation-rmse:0.36635
[275]	validation-rmse:0.36624
[276]	validation-rmse:0.36606
[277]	validation-rmse:0.36601
[278]	validation-rmse:0.36603
[279]	validation-rmse:0.36615
[280]	validation-rmse:0.36588
[281]	validation-rmse:0.36598
[282]	validation-rmse:0.36593
[283]	validation-rmse:0.36595
[284]	validation-rmse:0.36582
[285]	validation-rmse:0.36589
[286]	validation-rmse:0.36589
[287]	validation-rmse:0.36581
[288]	validation-rmse:0.36588
[289]	validation-rmse:0.36561
[290]	validation-rmse:0.36596
[291]	validation-rmse:0.36597
[292]	validation-rmse:0.36584
[293]	validation-rmse:0.36586
[294]	validation-rmse:0.36571
[295]	validation-rmse:0.36571
[296]	validation-rmse:0.36569




In [32]:
key="???"
if best_params.get(key):
    print(best_params.get(key))
else:
    print("no value")

no value


In [33]:
pd.DataFrame.from_dict([best_params])

Unnamed: 0,max_depth,min_child,reg_alpha,reg_lambda
0,4,20.081414,0.341042,0.058103


In [None]:
# X_balanced, y_balanced = oversample.fit_resample(X_train, y_train)
# X_test_balanced, y_test_balanced = oversample.fit_resample(X_test, y_test)

In [35]:
y_pred = booster.predict(validation)

rmse = mean_squared_error(y_test_balanced, y_pred, squared=False)
rmse

0.36569041725866674

## Model Logging 

In [37]:
with open('models/moodel.bin', 'wb') as f_out:
    pickle.dump(booster, f_out)

In [None]:
with open('preprocessing/process_dataframe.bin', 'wb') as f_out:
    pickle.dump(process_dataframe, f_out)

In [None]:
mlflow.set_experiment("test")
with mlflow.start_run():
    best_params = {

    }
    
    mlflow.log_params(best_params)
    mlflow.log_param('train_data_name', 'green_tripdata_2022-01.parquet')
    mlflow.log_param('validation_data_name', 'green_tripdata_2022-02.parquet')
    mlflow.set_tag('model', 'xgboost')
    
    booster = xgb.train(
    params = best_params,
    dtrain = train,
    evals = [(validation, "validation")],
    num_boost_round = 500,
    early_stopping_rounds = 50,
    )
    
    mlflow.xgboost.log_model(booster, artifact_path='mlflow_models')
    mlflow.log_artifact('preprocessing/process_dataframe.bin', artifact_path='preprocessing')
    

## Load Model

In [None]:
logged_model = 'runs:???'
loaded_model = mlflow.pyfunc.load_model(logged_model)

In [None]:
type(loaded_model)

In [None]:
y_preds = loaded_model.predict(X_val)

In [None]:
mean_squared_error(y_preds, y_val, squared=False)

In [None]:
y_preds

In [None]:
print(loaded_model.metadata.get_model_info())

## Sklearn Models

In [None]:
# X_balanced, y_balanced = oversample.fit_resample(X_train, y_train)
# X_test_balanced, y_test_balanced = oversample.fit_resample(X_test, y_test)

In [None]:
mlflow.sklearn.autolog()

for algorithm in (LinearSVR, RandomForestRegressor, GradientBoostingRegressor):
    with mlflow.start_run():
        mlflow.log_param('train_data_name', 'green_tripdata_2022-01.parquet')
        mlflow.log_param('validation_data_name', 'green_tripdata_2022-02.parquet')
        mlflow.log_artifact('preprocessing/process_dataframe.bin', artifact_path='preprocessing')
        model = algorithm()
        model.fit(X_train, y_train)
        
        preds = model.predict(X_val)
        rmse = mean_squared_error(preds, y_val, squared=False)
        mlflow.log_metric("rmse", rmse)
        

## MLflow Client

In [38]:
from mlflow.tracking import MlflowClient
from mlflow.entities import ViewType

In [39]:
MLFLOW_URI = "sqlite:///mlflow.db"

In [40]:
client = MlflowClient(MLFLOW_URI)

In [42]:
# client.list_experiments()

In [43]:
client.create_experiment(name = 'new-experimet')

'4'

In [45]:
# client.list_experiments()

In [48]:
runs = client.search_runs(
    experiment_ids='2',
    run_view_type=ViewType.ACTIVE_ONLY,
    filter_string='metrics.rmse < 7',
    max_results=5,
    order_by=["metrics.rmse ASC"],
)

In [49]:
for run in runs:
    print(f"run_id:{run.info.run_id}, metrics:{run.data.metrics['rmse']}")

run_id:97799b27d87e4927901730624322c630, metrics:0.36569041725866674
run_id:f3a8e6bdea15498b9646767f9124b3a0, metrics:0.3729975225170953
run_id:affe23b7a4094f3f8359ae9059d293c2, metrics:0.37395163591066455
run_id:3e501cce01864da9acd56d027a439c46, metrics:0.37762014014764883
run_id:c4fe60a477424608971774cb326ef6b7, metrics:0.3957139380531629
