In [11]:
import pandas as pd
import numpy as np
import mlflow
import dagshub

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score



In [2]:
df = pd.read_csv(r'D:\MIT\dec 26\mlops-project\data\external\loan_approval_dataset.csv')


In [3]:
df.head()

Unnamed: 0,loan_id,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,loan_status
0,1,2,Graduate,No,9600000,29900000,12,778,2400000,17600000,22700000,8000000,Approved
1,2,0,Not Graduate,Yes,4100000,12200000,8,417,2700000,2200000,8800000,3300000,Rejected
2,3,3,Graduate,No,9100000,29700000,20,506,7100000,4500000,33300000,12800000,Rejected
3,4,3,Graduate,No,8200000,30700000,8,467,18200000,3300000,23300000,7900000,Rejected
4,5,5,Not Graduate,Yes,9800000,24200000,20,382,12400000,8200000,29400000,5000000,Rejected


In [4]:
df.columns

Index(['loan_id', ' no_of_dependents', ' education', ' self_employed',
       ' income_annum', ' loan_amount', ' loan_term', ' cibil_score',
       ' residential_assets_value', ' commercial_assets_value',
       ' luxury_assets_value', ' bank_asset_value', ' loan_status'],
      dtype='object')

In [5]:
df.columns = df.columns.str.strip()

df.columns
    


Index(['loan_id', 'no_of_dependents', 'education', 'self_employed',
       'income_annum', 'loan_amount', 'loan_term', 'cibil_score',
       'residential_assets_value', 'commercial_assets_value',
       'luxury_assets_value', 'bank_asset_value', 'loan_status'],
      dtype='object')

In [6]:
x = df.drop(['loan_id', 'loan_status'], axis=1)

y = df['loan_status']


In [8]:
# ohe
x = pd.get_dummies(x)

x.head()


Unnamed: 0,no_of_dependents,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,education_ Graduate,education_ Not Graduate,self_employed_ No,self_employed_ Yes
0,2,9600000,29900000,12,778,2400000,17600000,22700000,8000000,True,False,True,False
1,0,4100000,12200000,8,417,2700000,2200000,8800000,3300000,False,True,False,True
2,3,9100000,29700000,20,506,7100000,4500000,33300000,12800000,True,False,True,False
3,3,8200000,30700000,8,467,18200000,3300000,23300000,7900000,True,False,True,False
4,5,9800000,24200000,20,382,12400000,8200000,29400000,5000000,False,True,False,True


In [12]:
# standardize

scaler = StandardScaler()

x_scaled = scaler.fit_transform(x)

In [13]:
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, 
                                                    test_size=0.2, random_state=42)


In [19]:
mlflow.set_tracking_uri("https://dagshub.com/AdityaThakare72/mlops-project-1.mlflow")

dagshub.init(repo_owner='AdityaThakare72', repo_name='mlops-project-1', mlflow=True)
mlflow.set_experiment('exp1_random_forest_std1')


2024/12/26 12:19:30 INFO mlflow.tracking.fluent: Experiment with name 'exp1_random_forest_std1' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/b71435cf937c4f739794a6b956689f9c', creation_time=1735195769957, experiment_id='2', last_update_time=1735195769957, lifecycle_stage='active', name='exp1_random_forest_std1', tags={}>

In [20]:
with mlflow.start_run():

    mlflow.log_param('scaler', 'standard_scaler')
    mlflow.log_param('test_size', 0.2)

    model = RandomForestClassifier()

    model.fit(x_train, y_train)

    y_pred = model.predict(x_test)

    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, pos_label= ' Approved')
    precision = precision_score(y_test, y_pred, pos_label= ' Approved')
    recall = recall_score(y_test, y_pred, pos_label= ' Approved')

    mlflow.log_metric('accuracy', accuracy)
    mlflow.log_metric('f1_score', f1)
    mlflow.log_metric('precision', precision)
    mlflow.log_metric('recall', recall)

    mlflow.sklearn.log_model(model, 'random_forest_model')

    mlflow.log_artifact('exp1_initial.ipynb')




🏃 View run brawny-hog-630 at: https://dagshub.com/AdityaThakare72/mlops-project-1.mlflow/#/experiments/2/runs/371dbc28a2c84e3d8bf25f5acc0f109d
🧪 View experiment at: https://dagshub.com/AdityaThakare72/mlops-project-1.mlflow/#/experiments/2
