## Credit Card Approval Experiment Tracking

In [1]:
!python -V

Python 3.10.11


In [13]:
import requests
import pickle

import pandas as pd

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.svm import LinearSVR

from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

from tqdm import tqdm

In [3]:
# Data download.
data = pd.read_csv('data/application_record.csv')
record = pd.read_csv('data/credit_record.csv')

**Data Exploration**

In [4]:
# find all users' account open month.
begin_month=pd.DataFrame(record.groupby(["ID"])["MONTHS_BALANCE"].agg(min))
begin_month=begin_month.rename(columns={'MONTHS_BALANCE':'begin_month'}) 

new_data=pd.merge(data,begin_month,how="left",on="ID") #merge to record datatrain_raw_data.head()

In [5]:
# Assuming 'record' is your DataFrame containing the 'STATUS' and 'dep_value' columns.
record['dep_value'] = None
record.loc[record['STATUS'].isin(['2', '3', '4', '5']), 'dep_value'] = 'Yes'

cpunt=record.groupby('ID').count()
cpunt['dep_value'][cpunt['dep_value'] > 0]='Yes' 
cpunt['dep_value'][cpunt['dep_value'] == 0]='No' 
cpunt = cpunt[['dep_value']]

merge_data=pd.merge(new_data,cpunt,how='inner',on='ID')
merge_data.head()

Unnamed: 0,ID,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,begin_month,dep_value
0,5008804,M,Y,Y,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,-12005,-4542,1,1,0,0,,2.0,-15.0,No
1,5008805,M,Y,Y,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,-12005,-4542,1,1,0,0,,2.0,-14.0,No
2,5008806,M,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,-21474,-1134,1,0,0,0,Security staff,2.0,-29.0,No
3,5008808,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,1,0,1,1,Sales staff,1.0,-4.0,No
4,5008809,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,1,0,1,1,Sales staff,1.0,-26.0,No


In [47]:
# Assuming 'new_data' is your DataFrame.
# Check for missing values (NaN) in the DataFrame
print(merge_data.isna().sum())

# Drop rows with any NaN values
merge_data.dropna(inplace=True)

# Display the first few rows of the cleaned DataFrame
merge_data.head()

ID                         0
CODE_GENDER                0
FLAG_OWN_CAR               0
FLAG_OWN_REALTY            0
CNT_CHILDREN               0
AMT_INCOME_TOTAL           0
NAME_INCOME_TYPE           0
NAME_EDUCATION_TYPE        0
NAME_FAMILY_STATUS         0
NAME_HOUSING_TYPE          0
DAYS_BIRTH                 0
DAYS_EMPLOYED              0
FLAG_MOBIL                 0
FLAG_WORK_PHONE            0
FLAG_PHONE                 0
FLAG_EMAIL                 0
OCCUPATION_TYPE        11323
CNT_FAM_MEMBERS            0
begin_month                0
dep_value                  0
dtype: int64


Unnamed: 0,ID,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_BIRTH,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,begin_month,dep_value
2,5008806,M,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,-21474,-1134,1,0,0,0,Security staff,2.0,-29.0,No
3,5008808,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,1,0,1,1,Sales staff,1.0,-4.0,No
4,5008809,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,1,0,1,1,Sales staff,1.0,-26.0,No
5,5008810,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,1,0,1,1,Sales staff,1.0,-26.0,No
6,5008811,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,-19110,-3051,1,0,1,1,Sales staff,1.0,-38.0,No


In [6]:
# Now, we remove OCCUPATION_TYPE. As we said, it have too many missed data
# In addition, we will get rid of FLAG_MOBIL, FLAG_WORK_PHONE, FLAG_PHONE and FLAG_EMAIL. 
# These parameters do not affect in any way whether a person is creditworthy or not.

merge_data.drop(['ID','FLAG_WORK_PHONE','FLAG_PHONE','FLAG_EMAIL'], axis=1, inplace=True) 

In [7]:
# Now, let's convert all non-numeric data to numeric data using LabelEncoder().
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for x in merge_data:
    if merge_data[x].dtypes == 'object':
        merge_data[x] = le.fit_transform(merge_data[x])

In [8]:
data = merge_data.copy()

In [9]:
X = data.iloc[:,1:-1] # X value contains all the variables except labels
y = data.iloc[:,-1] # these are the labels
y

0        0
1        0
2        0
3        0
4        0
        ..
36452    1
36453    1
36454    1
36455    1
36456    1
Name: dep_value, Length: 36457, dtype: int32

In [14]:
# Split train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [15]:
oversample = SMOTE()
X_balanced, y_balanced = oversample.fit_resample(X_train, y_train)
X_test_balanced, y_test_balanced = oversample.fit_resample(X_test, y_test)

In [15]:
def process_dataframe(data):
#     data.lpep_dropoff_datetime = pd.to_datetime(data.lpep_dropoff_datetime)
#     data.lpep_pickup_datetime = pd.to_datetime(data.lpep_pickup_datetime)

#     data['duration'] = data.lpep_dropoff_datetime - data.lpep_pickup_datetime
#     data.duration = data.duration.apply(lambda td: td.total_seconds() / 60)
#     data = data[(data.duration >= 1) & (data.duration <= 60)]
    
#     data['PULocationID'].astype(str, copy=False)
#     data['DOLocationID'].astype(str, copy=False)
    
    return data

In [17]:
# X_train = process_dataframe(train_raw_data)[num_features + cat_features]
# X_val = process_dataframe(val_raw_data)[num_features + cat_features] 

# y_train = process_dataframe(train_raw_data)['duration']
# y_val = process_dataframe(val_raw_data)['duration'] 

In [18]:
# X_val.isnull().sum()

trip_distance    0
extra            0
fare_amount      0
PULocationID     0
DOLocationID     0
dtype: int64

## Simple Experimnet 

**Data Preprocessing**

In [16]:
lr = Ridge()
lr.fit(X_balanced, y_balanced)

y_pred = lr.predict(X_test_balanced)

mean_squared_error(y_test_balanced, y_pred, squared=False)

0.4289015372655376

## MLflow tracking

In [17]:
import mlflow

In [18]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("test")

<Experiment: artifact_location='file:///D:/Project Pycharm/creditcard_approval_prediction/mlruns/1', creation_time=1690009378628, experiment_id='1', last_update_time=1690009378628, lifecycle_stage='active', name='test', tags={}>

In [64]:
with mlflow.start_run():
    mlflow.set_tag("workspace", "in_class")
    mlflow.log_param("dataset", "data/")
#     mlflow.log_param("validation_data_name", "green_tripdata_2021-02.parquet")
    
    alpha = 0.99
    mlflow.log_param("alpha", alpha)
    
    lr = Lasso(alpha)
    lr.fit(X_balanced, y_balanced) 
    y_pred = lr.predict(X_test_balanced)

    rmse = mean_squared_error(y_test_balanced, y_pred, squared=False)
    
    mlflow.log_metric("rmse", rmse)

## Hyperparameters Optimization

In [56]:
import xgboost as xgb

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [57]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("test")

<Experiment: artifact_location='file:///D:/Project Pycharm/creditcard_approval_prediction/mlruns/1', creation_time=1690009378628, experiment_id='1', last_update_time=1690009378628, lifecycle_stage='active', name='test', tags={}>

In [None]:
# X_balanced, y_balanced = oversample.fit_resample(X_train, y_train)
# X_test_balanced, y_test_balanced = oversample.fit_resample(X_test, y_test)

In [25]:
train = xgb.DMatrix(X_balanced, label=y_train)
validation = xgb.DMatrix(X_test_balanced, label=y_val)

In [59]:
def objective(params):
    with mlflow.start_run():
        num_boost_round = 500
        early_stopping_rounds = 50
        
        mlflow.log_params(params)
        mlflow.log_param('num_boost_round', num_boost_round)
        mlflow.log_param('early_stopping_rounds', early_stopping_rounds)
        mlflow.log_param('train_data_name', 'green_tripdata_2021-01.parquet')
        mlflow.log_param('validation_data_name', 'green_tripdata_2021-02.parquet')
        mlflow.set_tag('model', 'xgboost')

        booster = xgb.train(
            params = params,
            dtrain = train,
            evals = [(validation, "validation")],
            num_boost_round = num_boost_round,
            early_stopping_rounds = early_stopping_rounds
        )
        
        y_pred = booster.predict(validation)
        rmse = mean_squared_error(y_test_balanced, y_pred, squared=False)
        mlflow.log_metric('rmse', rmse)
        return {'loss': rmse, 'status': STATUS_OK}

In [60]:
grid_search = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child': hp.loguniform('min_child', -1, 3),
    'seed': 111,
    'objective': 'reg:linear'
}

In [61]:
best_model = fmin(
    fn=objective,
    space=grid_search,
    algo=tpe.suggest,
    max_evals=30,
    trials=Trials()
)

Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42838                                                                                            
[1]	validation-rmse:0.40007                                                                                            
[2]	validation-rmse:0.39268                                                                                            
[3]	validation-rmse:0.39489                                                                                            
[4]	validation-rmse:0.39943                                                                                            
[5]	validation-rmse:0.40446                                                                                            
[6]	validation-rmse:0.40842                                                                                            
[7]	validation-rmse:0.41216                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.43008                                                                                            
[1]	validation-rmse:0.39860                                                                                            
[2]	validation-rmse:0.38280                                                                                            
[3]	validation-rmse:0.37601                                                                                            
[4]	validation-rmse:0.37505                                                                                            
[5]	validation-rmse:0.37482                                                                                            
[6]	validation-rmse:0.37685                                                                                            
[7]	validation-rmse:0.37872                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42822                                                                                            
[1]	validation-rmse:0.39970                                                                                            
[2]	validation-rmse:0.39261                                                                                            
[3]	validation-rmse:0.39387                                                                                            
[4]	validation-rmse:0.39921                                                                                            
[5]	validation-rmse:0.40389                                                                                            
[6]	validation-rmse:0.40801                                                                                            
[7]	validation-rmse:0.41086                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42970                                                                                            
[1]	validation-rmse:0.39832                                                                                            
[2]	validation-rmse:0.39020                                                                                            
[3]	validation-rmse:0.38875                                                                                            
[4]	validation-rmse:0.38835                                                                                            
[5]	validation-rmse:0.39028                                                                                            
[6]	validation-rmse:0.39152                                                                                            
[7]	validation-rmse:0.39337                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42833                                                                                            
[1]	validation-rmse:0.39958                                                                                            
[2]	validation-rmse:0.39319                                                                                            
[3]	validation-rmse:0.39459                                                                                            
[4]	validation-rmse:0.39947                                                                                            
[5]	validation-rmse:0.40415                                                                                            
[6]	validation-rmse:0.40683                                                                                            
[7]	validation-rmse:0.40923                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42939                                                                                            
[1]	validation-rmse:0.40073                                                                                            
[2]	validation-rmse:0.39658                                                                                            
[3]	validation-rmse:0.39935                                                                                            
[4]	validation-rmse:0.40462                                                                                            
[5]	validation-rmse:0.40964                                                                                            
[6]	validation-rmse:0.41397                                                                                            
[7]	validation-rmse:0.41699                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42779                                                                                            
[1]	validation-rmse:0.39900                                                                                            
[2]	validation-rmse:0.39299                                                                                            
[3]	validation-rmse:0.39472                                                                                            
[4]	validation-rmse:0.39916                                                                                            
[5]	validation-rmse:0.40165                                                                                            
[6]	validation-rmse:0.40371                                                                                            
[7]	validation-rmse:0.40634                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.45885                                                                                            
[1]	validation-rmse:0.42926                                                                                            
[2]	validation-rmse:0.40878                                                                                            
[3]	validation-rmse:0.39162                                                                                            
[4]	validation-rmse:0.37133                                                                                            
[5]	validation-rmse:0.36529                                                                                            
[6]	validation-rmse:0.35387                                                                                            
[7]	validation-rmse:0.34687                                                                                          

[64]	validation-rmse:0.28820                                                                                           
[65]	validation-rmse:0.28708                                                                                           
[66]	validation-rmse:0.28679                                                                                           
[67]	validation-rmse:0.28686                                                                                           
[68]	validation-rmse:0.28644                                                                                           
[69]	validation-rmse:0.28545                                                                                           
[70]	validation-rmse:0.28487                                                                                           
[71]	validation-rmse:0.28472                                                                                           
[72]	validation-rmse:0.28470            

[132]	validation-rmse:0.27268                                                                                          
[133]	validation-rmse:0.27268                                                                                          
[134]	validation-rmse:0.27263                                                                                          
[135]	validation-rmse:0.27261                                                                                          
[136]	validation-rmse:0.27222                                                                                          
[137]	validation-rmse:0.27225                                                                                          
[138]	validation-rmse:0.27222                                                                                          
[139]	validation-rmse:0.27182                                                                                          
[140]	validation-rmse:0.27153           

[200]	validation-rmse:0.26544                                                                                          
[201]	validation-rmse:0.26538                                                                                          
[202]	validation-rmse:0.26527                                                                                          
[203]	validation-rmse:0.26515                                                                                          
[204]	validation-rmse:0.26496                                                                                          
[205]	validation-rmse:0.26491                                                                                          
[206]	validation-rmse:0.26485                                                                                          
[207]	validation-rmse:0.26466                                                                                          
[208]	validation-rmse:0.26473           

[268]	validation-rmse:0.26185                                                                                          
[269]	validation-rmse:0.26186                                                                                          
[270]	validation-rmse:0.26165                                                                                          
[271]	validation-rmse:0.26158                                                                                          
[272]	validation-rmse:0.26145                                                                                          
[273]	validation-rmse:0.26136                                                                                          
[274]	validation-rmse:0.26137                                                                                          
[275]	validation-rmse:0.26127                                                                                          
[276]	validation-rmse:0.26124           

[336]	validation-rmse:0.25985                                                                                          
[337]	validation-rmse:0.25981                                                                                          
[338]	validation-rmse:0.25988                                                                                          
[339]	validation-rmse:0.25986                                                                                          
[340]	validation-rmse:0.25975                                                                                          
[341]	validation-rmse:0.25963                                                                                          
[342]	validation-rmse:0.25970                                                                                          
[343]	validation-rmse:0.25979                                                                                          
[344]	validation-rmse:0.25980           

[404]	validation-rmse:0.25840                                                                                          
[405]	validation-rmse:0.25834                                                                                          
[406]	validation-rmse:0.25828                                                                                          
[407]	validation-rmse:0.25834                                                                                          
[408]	validation-rmse:0.25824                                                                                          
[409]	validation-rmse:0.25823                                                                                          
[410]	validation-rmse:0.25824                                                                                          
[411]	validation-rmse:0.25824                                                                                          
[412]	validation-rmse:0.25821           

[472]	validation-rmse:0.25693                                                                                          
[473]	validation-rmse:0.25688                                                                                          
[474]	validation-rmse:0.25695                                                                                          
[475]	validation-rmse:0.25699                                                                                          
[476]	validation-rmse:0.25700                                                                                          
[477]	validation-rmse:0.25700                                                                                          
[478]	validation-rmse:0.25698                                                                                          
[479]	validation-rmse:0.25696                                                                                          
[480]	validation-rmse:0.25695           




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42832                                                                                            
[1]	validation-rmse:0.39989                                                                                            
[2]	validation-rmse:0.39321                                                                                            
[3]	validation-rmse:0.39420                                                                                            
[4]	validation-rmse:0.39929                                                                                            
[5]	validation-rmse:0.40396                                                                                            
[6]	validation-rmse:0.40705                                                                                            
[7]	validation-rmse:0.40850                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42831                                                                                            
[1]	validation-rmse:0.39956                                                                                            
[2]	validation-rmse:0.39323                                                                                            
[3]	validation-rmse:0.39446                                                                                            
[4]	validation-rmse:0.39937                                                                                            
[5]	validation-rmse:0.40398                                                                                            
[6]	validation-rmse:0.40667                                                                                            
[7]	validation-rmse:0.40918                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42939                                                                                            
[1]	validation-rmse:0.39968                                                                                            
[2]	validation-rmse:0.39166                                                                                            
[3]	validation-rmse:0.39074                                                                                            
[4]	validation-rmse:0.39373                                                                                            
[5]	validation-rmse:0.39460                                                                                            
[6]	validation-rmse:0.39661                                                                                            
[7]	validation-rmse:0.39722                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42903                                                                                            
[1]	validation-rmse:0.40603                                                                                            
[2]	validation-rmse:0.40480                                                                                            
[3]	validation-rmse:0.41165                                                                                            
[4]	validation-rmse:0.41683                                                                                            
[5]	validation-rmse:0.42189                                                                                            
[6]	validation-rmse:0.42645                                                                                            
[7]	validation-rmse:0.42989                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42976                                                                                            
[1]	validation-rmse:0.40666                                                                                            
[2]	validation-rmse:0.40058                                                                                            
[3]	validation-rmse:0.40327                                                                                            
[4]	validation-rmse:0.40809                                                                                            
[5]	validation-rmse:0.41333                                                                                            
[6]	validation-rmse:0.41697                                                                                            
[7]	validation-rmse:0.42012                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42857                                                                                            
[1]	validation-rmse:0.40034                                                                                            
[2]	validation-rmse:0.39463                                                                                            
[3]	validation-rmse:0.39752                                                                                            
[4]	validation-rmse:0.40212                                                                                            
[5]	validation-rmse:0.40747                                                                                            
[6]	validation-rmse:0.41118                                                                                            
[7]	validation-rmse:0.41437                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42938                                                                                            
[1]	validation-rmse:0.40550                                                                                            
[2]	validation-rmse:0.40423                                                                                            
[3]	validation-rmse:0.40677                                                                                            
[4]	validation-rmse:0.41241                                                                                            
[5]	validation-rmse:0.41812                                                                                            
[6]	validation-rmse:0.42301                                                                                            
[7]	validation-rmse:0.42689                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42784                                                                                            
[1]	validation-rmse:0.39887                                                                                            
[2]	validation-rmse:0.39147                                                                                            
[3]	validation-rmse:0.39334                                                                                            
[4]	validation-rmse:0.39856                                                                                            
[5]	validation-rmse:0.40300                                                                                            
[6]	validation-rmse:0.40725                                                                                            
[7]	validation-rmse:0.40886                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42840                                                                                            
[1]	validation-rmse:0.40002                                                                                            
[2]	validation-rmse:0.39168                                                                                            
[3]	validation-rmse:0.39252                                                                                            
[4]	validation-rmse:0.39646                                                                                            
[5]	validation-rmse:0.39958                                                                                            
[6]	validation-rmse:0.40252                                                                                            
[7]	validation-rmse:0.40328                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42939                                                                                            
[1]	validation-rmse:0.40587                                                                                            
[2]	validation-rmse:0.40003                                                                                            
[3]	validation-rmse:0.40300                                                                                            
[4]	validation-rmse:0.40928                                                                                            
[5]	validation-rmse:0.41458                                                                                            
[6]	validation-rmse:0.41889                                                                                            
[7]	validation-rmse:0.42204                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42824                                                                                            
[1]	validation-rmse:0.39920                                                                                            
[2]	validation-rmse:0.39092                                                                                            
[3]	validation-rmse:0.39296                                                                                            
[4]	validation-rmse:0.39803                                                                                            
[5]	validation-rmse:0.40314                                                                                            
[6]	validation-rmse:0.40715                                                                                            
[7]	validation-rmse:0.41058                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42880                                                                                            
[1]	validation-rmse:0.40055                                                                                            
[2]	validation-rmse:0.39279                                                                                            
[3]	validation-rmse:0.39334                                                                                            
[4]	validation-rmse:0.39829                                                                                            
[5]	validation-rmse:0.40145                                                                                            
[6]	validation-rmse:0.40293                                                                                            
[7]	validation-rmse:0.40437                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.45886                                                                                            
[1]	validation-rmse:0.42928                                                                                            
[2]	validation-rmse:0.40880                                                                                            
[3]	validation-rmse:0.39163                                                                                            
[4]	validation-rmse:0.37151                                                                                            
[5]	validation-rmse:0.36549                                                                                            
[6]	validation-rmse:0.35387                                                                                            
[7]	validation-rmse:0.34735                                                                                          

[64]	validation-rmse:0.29027                                                                                           
[65]	validation-rmse:0.29013                                                                                           
[66]	validation-rmse:0.29015                                                                                           
[67]	validation-rmse:0.28975                                                                                           
[68]	validation-rmse:0.28991                                                                                           
[69]	validation-rmse:0.28979                                                                                           
[70]	validation-rmse:0.28990                                                                                           
[71]	validation-rmse:0.28848                                                                                           
[72]	validation-rmse:0.28811            

[132]	validation-rmse:0.27501                                                                                          
[133]	validation-rmse:0.27486                                                                                          
[134]	validation-rmse:0.27473                                                                                          
[135]	validation-rmse:0.27448                                                                                          
[136]	validation-rmse:0.27428                                                                                          
[137]	validation-rmse:0.27388                                                                                          
[138]	validation-rmse:0.27370                                                                                          
[139]	validation-rmse:0.27370                                                                                          
[140]	validation-rmse:0.27330           

[200]	validation-rmse:0.26920                                                                                          
[201]	validation-rmse:0.26914                                                                                          
[202]	validation-rmse:0.26941                                                                                          
[203]	validation-rmse:0.26941                                                                                          
[204]	validation-rmse:0.26940                                                                                          
[205]	validation-rmse:0.26931                                                                                          
[206]	validation-rmse:0.26925                                                                                          
[207]	validation-rmse:0.26921                                                                                          
[208]	validation-rmse:0.26923           

[268]	validation-rmse:0.26707                                                                                          
[269]	validation-rmse:0.26694                                                                                          
[270]	validation-rmse:0.26689                                                                                          
[271]	validation-rmse:0.26692                                                                                          
[272]	validation-rmse:0.26677                                                                                          
[273]	validation-rmse:0.26669                                                                                          
[274]	validation-rmse:0.26665                                                                                          
[275]	validation-rmse:0.26654                                                                                          
[276]	validation-rmse:0.26646           

[336]	validation-rmse:0.26513                                                                                          
[337]	validation-rmse:0.26506                                                                                          
[338]	validation-rmse:0.26519                                                                                          
[339]	validation-rmse:0.26519                                                                                          
[340]	validation-rmse:0.26526                                                                                          
[341]	validation-rmse:0.26523                                                                                          
[342]	validation-rmse:0.26536                                                                                          
[343]	validation-rmse:0.26542                                                                                          
[344]	validation-rmse:0.26539           

[404]	validation-rmse:0.26460                                                                                          
[405]	validation-rmse:0.26462                                                                                          
[406]	validation-rmse:0.26461                                                                                          
[407]	validation-rmse:0.26462                                                                                          
[408]	validation-rmse:0.26459                                                                                          
[409]	validation-rmse:0.26472                                                                                          
[410]	validation-rmse:0.26469                                                                                          
[411]	validation-rmse:0.26468                                                                                          
[412]	validation-rmse:0.26466           

[472]	validation-rmse:0.26373                                                                                          
[473]	validation-rmse:0.26372                                                                                          
[474]	validation-rmse:0.26370                                                                                          
[475]	validation-rmse:0.26371                                                                                          
[476]	validation-rmse:0.26371                                                                                          
[477]	validation-rmse:0.26370                                                                                          
[478]	validation-rmse:0.26369                                                                                          
[479]	validation-rmse:0.26368                                                                                          
[480]	validation-rmse:0.26366           




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.45137                                                                                            
[1]	validation-rmse:0.42299                                                                                            
[2]	validation-rmse:0.40111                                                                                            
[3]	validation-rmse:0.38278                                                                                            
[4]	validation-rmse:0.36879                                                                                            
[5]	validation-rmse:0.36158                                                                                            
[6]	validation-rmse:0.35095                                                                                            
[7]	validation-rmse:0.34687                                                                                          

[64]	validation-rmse:0.29461                                                                                           
[65]	validation-rmse:0.29424                                                                                           
[66]	validation-rmse:0.29411                                                                                           
[67]	validation-rmse:0.29336                                                                                           
[68]	validation-rmse:0.29303                                                                                           
[69]	validation-rmse:0.29259                                                                                           
[70]	validation-rmse:0.29220                                                                                           
[71]	validation-rmse:0.29230                                                                                           
[72]	validation-rmse:0.29224            

[132]	validation-rmse:0.28275                                                                                          
[133]	validation-rmse:0.28253                                                                                          
[134]	validation-rmse:0.28228                                                                                          
[135]	validation-rmse:0.28206                                                                                          
[136]	validation-rmse:0.28204                                                                                          
[137]	validation-rmse:0.28203                                                                                          
[138]	validation-rmse:0.28198                                                                                          
[139]	validation-rmse:0.28189                                                                                          
[140]	validation-rmse:0.28172           

[200]	validation-rmse:0.27898                                                                                          
[201]	validation-rmse:0.27895                                                                                          
[202]	validation-rmse:0.27887                                                                                          
[203]	validation-rmse:0.27880                                                                                          
[204]	validation-rmse:0.27868                                                                                          
[205]	validation-rmse:0.27875                                                                                          
[206]	validation-rmse:0.27868                                                                                          
[207]	validation-rmse:0.27875                                                                                          
[208]	validation-rmse:0.27878           

[268]	validation-rmse:0.27773                                                                                          
[269]	validation-rmse:0.27773                                                                                          
[270]	validation-rmse:0.27771                                                                                          
[271]	validation-rmse:0.27771                                                                                          
[272]	validation-rmse:0.27768                                                                                          
[273]	validation-rmse:0.27770                                                                                          
[274]	validation-rmse:0.27768                                                                                          
[275]	validation-rmse:0.27770                                                                                          
[276]	validation-rmse:0.27768           

[336]	validation-rmse:0.27766                                                                                          
[337]	validation-rmse:0.27767                                                                                          
[338]	validation-rmse:0.27769                                                                                          
[339]	validation-rmse:0.27764                                                                                          
[340]	validation-rmse:0.27765                                                                                          
[341]	validation-rmse:0.27764                                                                                          
[342]	validation-rmse:0.27759                                                                                          
[343]	validation-rmse:0.27757                                                                                          
[344]	validation-rmse:0.27754           

[404]	validation-rmse:0.27742                                                                                          
[405]	validation-rmse:0.27741                                                                                          
[406]	validation-rmse:0.27739                                                                                          
[407]	validation-rmse:0.27741                                                                                          
[408]	validation-rmse:0.27743                                                                                          
[409]	validation-rmse:0.27741                                                                                          
[410]	validation-rmse:0.27745                                                                                          
[411]	validation-rmse:0.27751                                                                                          
[412]	validation-rmse:0.27747           




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.46676                                                                                            
[1]	validation-rmse:0.43885                                                                                            
[2]	validation-rmse:0.41891                                                                                            
[3]	validation-rmse:0.39882                                                                                            
[4]	validation-rmse:0.38589                                                                                            
[5]	validation-rmse:0.37299                                                                                            
[6]	validation-rmse:0.36444                                                                                            
[7]	validation-rmse:0.35062                                                                                          

[64]	validation-rmse:0.29299                                                                                           
[65]	validation-rmse:0.29294                                                                                           
[66]	validation-rmse:0.29310                                                                                           
[67]	validation-rmse:0.29213                                                                                           
[68]	validation-rmse:0.29171                                                                                           
[69]	validation-rmse:0.29174                                                                                           
[70]	validation-rmse:0.29140                                                                                           
[71]	validation-rmse:0.29165                                                                                           
[72]	validation-rmse:0.29095            

[132]	validation-rmse:0.27550                                                                                          
[133]	validation-rmse:0.27526                                                                                          
[134]	validation-rmse:0.27486                                                                                          
[135]	validation-rmse:0.27488                                                                                          
[136]	validation-rmse:0.27444                                                                                          
[137]	validation-rmse:0.27372                                                                                          
[138]	validation-rmse:0.27347                                                                                          
[139]	validation-rmse:0.27380                                                                                          
[140]	validation-rmse:0.27408           

[200]	validation-rmse:0.26314                                                                                          
[201]	validation-rmse:0.26352                                                                                          
[202]	validation-rmse:0.26326                                                                                          
[203]	validation-rmse:0.26321                                                                                          
[204]	validation-rmse:0.26334                                                                                          
[205]	validation-rmse:0.26322                                                                                          
[206]	validation-rmse:0.26303                                                                                          
[207]	validation-rmse:0.26296                                                                                          
[208]	validation-rmse:0.26269           

[268]	validation-rmse:0.25635                                                                                          
[269]	validation-rmse:0.25614                                                                                          
[270]	validation-rmse:0.25606                                                                                          
[271]	validation-rmse:0.25585                                                                                          
[272]	validation-rmse:0.25569                                                                                          
[273]	validation-rmse:0.25556                                                                                          
[274]	validation-rmse:0.25547                                                                                          
[275]	validation-rmse:0.25529                                                                                          
[276]	validation-rmse:0.25529           

[336]	validation-rmse:0.25091                                                                                          
[337]	validation-rmse:0.25086                                                                                          
[338]	validation-rmse:0.25071                                                                                          
[339]	validation-rmse:0.25071                                                                                          
[340]	validation-rmse:0.25073                                                                                          
[341]	validation-rmse:0.25071                                                                                          
[342]	validation-rmse:0.25060                                                                                          
[343]	validation-rmse:0.25053                                                                                          
[344]	validation-rmse:0.25028           

[404]	validation-rmse:0.24796                                                                                          
[405]	validation-rmse:0.24791                                                                                          
[406]	validation-rmse:0.24787                                                                                          
[407]	validation-rmse:0.24786                                                                                          
[408]	validation-rmse:0.24784                                                                                          
[409]	validation-rmse:0.24783                                                                                          
[410]	validation-rmse:0.24779                                                                                          
[411]	validation-rmse:0.24781                                                                                          
[412]	validation-rmse:0.24776           

[472]	validation-rmse:0.24570                                                                                          
[473]	validation-rmse:0.24570                                                                                          
[474]	validation-rmse:0.24566                                                                                          
[475]	validation-rmse:0.24561                                                                                          
[476]	validation-rmse:0.24548                                                                                          
[477]	validation-rmse:0.24545                                                                                          
[478]	validation-rmse:0.24541                                                                                          
[479]	validation-rmse:0.24542                                                                                          
[480]	validation-rmse:0.24544           




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.43012                                                                                            
[1]	validation-rmse:0.40084                                                                                            
[2]	validation-rmse:0.39057                                                                                            
[3]	validation-rmse:0.38856                                                                                            
[4]	validation-rmse:0.38885                                                                                            
[5]	validation-rmse:0.39216                                                                                            
[6]	validation-rmse:0.39555                                                                                            
[7]	validation-rmse:0.39790                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.43114                                                                                            
[1]	validation-rmse:0.40212                                                                                            
[2]	validation-rmse:0.39106                                                                                            
[3]	validation-rmse:0.39222                                                                                            
[4]	validation-rmse:0.39283                                                                                            
[5]	validation-rmse:0.39334                                                                                            
[6]	validation-rmse:0.39687                                                                                            
[7]	validation-rmse:0.39757                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.47005                                                                                            
[1]	validation-rmse:0.44450                                                                                            
[2]	validation-rmse:0.42843                                                                                            
[3]	validation-rmse:0.40834                                                                                            
[4]	validation-rmse:0.39951                                                                                            
[5]	validation-rmse:0.39194                                                                                            
[6]	validation-rmse:0.37090                                                                                            
[7]	validation-rmse:0.35860                                                                                          

[64]	validation-rmse:0.30540                                                                                           
[65]	validation-rmse:0.30467                                                                                           
[66]	validation-rmse:0.30382                                                                                           
[67]	validation-rmse:0.30384                                                                                           
[68]	validation-rmse:0.30357                                                                                           
[69]	validation-rmse:0.30219                                                                                           
[70]	validation-rmse:0.30149                                                                                           
[71]	validation-rmse:0.30111                                                                                           
[72]	validation-rmse:0.30115            

[132]	validation-rmse:0.28524                                                                                          
[133]	validation-rmse:0.28520                                                                                          
[134]	validation-rmse:0.28479                                                                                          
[135]	validation-rmse:0.28469                                                                                          
[136]	validation-rmse:0.28444                                                                                          
[137]	validation-rmse:0.28417                                                                                          
[138]	validation-rmse:0.28419                                                                                          
[139]	validation-rmse:0.28413                                                                                          
[140]	validation-rmse:0.28409           

[200]	validation-rmse:0.27195                                                                                          
[201]	validation-rmse:0.27163                                                                                          
[202]	validation-rmse:0.27115                                                                                          
[203]	validation-rmse:0.27123                                                                                          
[204]	validation-rmse:0.27117                                                                                          
[205]	validation-rmse:0.27120                                                                                          
[206]	validation-rmse:0.27063                                                                                          
[207]	validation-rmse:0.27062                                                                                          
[208]	validation-rmse:0.27014           

[268]	validation-rmse:0.26105                                                                                          
[269]	validation-rmse:0.26101                                                                                          
[270]	validation-rmse:0.26098                                                                                          
[271]	validation-rmse:0.26113                                                                                          
[272]	validation-rmse:0.26080                                                                                          
[273]	validation-rmse:0.26078                                                                                          
[274]	validation-rmse:0.26081                                                                                          
[275]	validation-rmse:0.26069                                                                                          
[276]	validation-rmse:0.26064           

[336]	validation-rmse:0.25508                                                                                          
[337]	validation-rmse:0.25487                                                                                          
[338]	validation-rmse:0.25457                                                                                          
[339]	validation-rmse:0.25446                                                                                          
[340]	validation-rmse:0.25407                                                                                          
[341]	validation-rmse:0.25400                                                                                          
[342]	validation-rmse:0.25381                                                                                          
[343]	validation-rmse:0.25359                                                                                          
[344]	validation-rmse:0.25370           

[404]	validation-rmse:0.24887                                                                                          
[405]	validation-rmse:0.24889                                                                                          
[406]	validation-rmse:0.24896                                                                                          
[407]	validation-rmse:0.24897                                                                                          
[408]	validation-rmse:0.24885                                                                                          
[409]	validation-rmse:0.24886                                                                                          
[410]	validation-rmse:0.24885                                                                                          
[411]	validation-rmse:0.24871                                                                                          
[412]	validation-rmse:0.24846           

[472]	validation-rmse:0.24424                                                                                          
[473]	validation-rmse:0.24408                                                                                          
[474]	validation-rmse:0.24402                                                                                          
[475]	validation-rmse:0.24402                                                                                          
[476]	validation-rmse:0.24393                                                                                          
[477]	validation-rmse:0.24383                                                                                          
[478]	validation-rmse:0.24386                                                                                          
[479]	validation-rmse:0.24376                                                                                          
[480]	validation-rmse:0.24374           




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42865                                                                                            
[1]	validation-rmse:0.40025                                                                                            
[2]	validation-rmse:0.39594                                                                                            
[3]	validation-rmse:0.39833                                                                                            
[4]	validation-rmse:0.40304                                                                                            
[5]	validation-rmse:0.40797                                                                                            
[6]	validation-rmse:0.41206                                                                                            
[7]	validation-rmse:0.41538                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42941                                                                                            
[1]	validation-rmse:0.40063                                                                                            
[2]	validation-rmse:0.39446                                                                                            
[3]	validation-rmse:0.39761                                                                                            
[4]	validation-rmse:0.40251                                                                                            
[5]	validation-rmse:0.40709                                                                                            
[6]	validation-rmse:0.41163                                                                                            
[7]	validation-rmse:0.41504                                                                                          




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.47005                                                                                            
[1]	validation-rmse:0.44450                                                                                            
[2]	validation-rmse:0.42843                                                                                            
[3]	validation-rmse:0.40834                                                                                            
[4]	validation-rmse:0.39950                                                                                            
[5]	validation-rmse:0.39194                                                                                            
[6]	validation-rmse:0.37090                                                                                            
[7]	validation-rmse:0.35859                                                                                          

[64]	validation-rmse:0.30531                                                                                           
[65]	validation-rmse:0.30550                                                                                           
[66]	validation-rmse:0.30529                                                                                           
[67]	validation-rmse:0.30490                                                                                           
[68]	validation-rmse:0.30532                                                                                           
[69]	validation-rmse:0.30551                                                                                           
[70]	validation-rmse:0.30523                                                                                           
[71]	validation-rmse:0.30474                                                                                           
[72]	validation-rmse:0.30400            

[132]	validation-rmse:0.28499                                                                                          
[133]	validation-rmse:0.28478                                                                                          
[134]	validation-rmse:0.28477                                                                                          
[135]	validation-rmse:0.28416                                                                                          
[136]	validation-rmse:0.28414                                                                                          
[137]	validation-rmse:0.28353                                                                                          
[138]	validation-rmse:0.28332                                                                                          
[139]	validation-rmse:0.28265                                                                                          
[140]	validation-rmse:0.28170           

[200]	validation-rmse:0.26886                                                                                          
[201]	validation-rmse:0.26870                                                                                          
[202]	validation-rmse:0.26862                                                                                          
[203]	validation-rmse:0.26862                                                                                          
[204]	validation-rmse:0.26856                                                                                          
[205]	validation-rmse:0.26840                                                                                          
[206]	validation-rmse:0.26842                                                                                          
[207]	validation-rmse:0.26841                                                                                          
[208]	validation-rmse:0.26837           

[268]	validation-rmse:0.26014                                                                                          
[269]	validation-rmse:0.26013                                                                                          
[270]	validation-rmse:0.26009                                                                                          
[271]	validation-rmse:0.26012                                                                                          
[272]	validation-rmse:0.26001                                                                                          
[273]	validation-rmse:0.26001                                                                                          
[274]	validation-rmse:0.25997                                                                                          
[275]	validation-rmse:0.25987                                                                                          
[276]	validation-rmse:0.25990           

[336]	validation-rmse:0.25274                                                                                          
[337]	validation-rmse:0.25265                                                                                          
[338]	validation-rmse:0.25265                                                                                          
[339]	validation-rmse:0.25241                                                                                          
[340]	validation-rmse:0.25246                                                                                          
[341]	validation-rmse:0.25237                                                                                          
[342]	validation-rmse:0.25231                                                                                          
[343]	validation-rmse:0.25211                                                                                          
[344]	validation-rmse:0.25201           

[404]	validation-rmse:0.24717                                                                                          
[405]	validation-rmse:0.24718                                                                                          
[406]	validation-rmse:0.24699                                                                                          
[407]	validation-rmse:0.24697                                                                                          
[408]	validation-rmse:0.24680                                                                                          
[409]	validation-rmse:0.24675                                                                                          
[410]	validation-rmse:0.24673                                                                                          
[411]	validation-rmse:0.24671                                                                                          
[412]	validation-rmse:0.24654           

[472]	validation-rmse:0.24378                                                                                          
[473]	validation-rmse:0.24374                                                                                          
[474]	validation-rmse:0.24371                                                                                          
[475]	validation-rmse:0.24393                                                                                          
[476]	validation-rmse:0.24376                                                                                          
[477]	validation-rmse:0.24374                                                                                          
[478]	validation-rmse:0.24376                                                                                          
[479]	validation-rmse:0.24365                                                                                          
[480]	validation-rmse:0.24356           




Parameters: { "min_child" } are not used.

[0]	validation-rmse:0.42855                                                                                            
[1]	validation-rmse:0.40002                                                                                            
[2]	validation-rmse:0.39499                                                                                            
[3]	validation-rmse:0.39632                                                                                            
[4]	validation-rmse:0.40121                                                                                            
[5]	validation-rmse:0.40653                                                                                            
[6]	validation-rmse:0.41072                                                                                            
[7]	validation-rmse:0.41406                                                                                          




100%|████████████████████████████████████████████████| 30/30 [07:35<00:00, 15.18s/trial, best loss: 0.2427683651738808]


## Train the Best Model

In [62]:
import xgboost as xgb

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [63]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("test")

<Experiment: artifact_location='file:///D:/Project Pycharm/creditcard_approval_prediction/mlruns/1', creation_time=1690009378628, experiment_id='1', last_update_time=1690009378628, lifecycle_stage='active', name='test', tags={}>

In [None]:
#We took the best params from the MLflow interface and copien them here

best_params = {
     
}

mlflow.xgboost.autolog()

booster = xgb.train(
    params = best_params,
    dtrain = train,
    evals = [(validation, "validation")],
    num_boost_round = 500,
    early_stopping_rounds = 50,
)

In [None]:
key="???"
if best_params.get(key):
    print(best_params.get(key))
else:
    print("no value")

In [None]:
pd.DataFrame.from_dict([best_params])

In [None]:
y_pred = booster.predict(validation)

rmse = mean_squared_error(y_val, y_pred, squared=False)
rmse

## Model Logging 

In [None]:
with open('models/moodel.bin', 'wb') as f_out:
    pickle.dump(booster, f_out)

In [None]:
with open('preprocessing/process_dataframe.bin', 'wb') as f_out:
    pickle.dump(process_dataframe, f_out)

In [None]:
mlflow.set_experiment("test")
with mlflow.start_run():
    best_params = {

    }
    
    mlflow.log_params(best_params)
    mlflow.log_param('train_data_name', 'green_tripdata_2022-01.parquet')
    mlflow.log_param('validation_data_name', 'green_tripdata_2022-02.parquet')
    mlflow.set_tag('model', 'xgboost')
    
    booster = xgb.train(
    params = best_params,
    dtrain = train,
    evals = [(validation, "validation")],
    num_boost_round = 500,
    early_stopping_rounds = 50,
    )
    
    mlflow.xgboost.log_model(booster, artifact_path='mlflow_models')
    mlflow.log_artifact('preprocessing/process_dataframe.bin', artifact_path='preprocessing')
    

## Load Model

In [None]:
logged_model = 'runs:???'
loaded_model = mlflow.pyfunc.load_model(logged_model)

In [None]:
type(loaded_model)

In [None]:
y_preds = loaded_model.predict(X_val)

In [None]:
mean_squared_error(y_preds, y_val, squared=False)

In [None]:
y_preds

In [None]:
print(loaded_model.metadata.get_model_info())

## Sklearn Models

In [None]:
mlflow.sklearn.autolog()

for algorithm in (LinearSVR, RandomForestRegressor, GradientBoostingRegressor):
    with mlflow.start_run():
        mlflow.log_param('train_data_name', 'green_tripdata_2022-01.parquet')
        mlflow.log_param('validation_data_name', 'green_tripdata_2022-02.parquet')
        mlflow.log_artifact('preprocessing/process_dataframe.bin', artifact_path='preprocessing')
        model = algorithm()
        model.fit(X_train, y_train)
        
        preds = model.predict(X_val)
        rmse = mean_squared_error(preds, y_val, squared=False)
        mlflow.log_metric("rmse", rmse)
        

## MLflow Client

In [None]:
from mlflow.tracking import MlflowClient
from mlflow.entities import ViewType

In [None]:
MLFLOW_URI = "sqlite:///mlflow.db"

In [None]:
client = MlflowClient(MLFLOW_URI)

In [None]:
client.list_experiments()

In [None]:
client.create_experiment(name = 'new-experimet')

In [None]:
client.list_experiments()

In [None]:
runs = client.search_runs(
    experiment_ids='1',
    run_view_type=ViewType.ACTIVE_ONLY,
    filter_string='metrics.rmse < 7',
    max_results=5,
    order_by=["metrics.rmse ASC"],
)

In [None]:
for run in runs:
    print(f"run_id:{run.info.run_id}, metrics:{run.data.metrics['rmse']}")