In [24]:
import pandas as pd
import optuna
import mlflow
import mlflow.xgboost
import xgboost as xgb
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from mlflow.tracking import MlflowClient

In [25]:

df = pd.read_csv("../dataset/output/completed_dataset.csv")
df["Date_Sold"] = pd.to_datetime(df["Date_Sold"])
df["Year_Sold"] = df["Date_Sold"].dt.year
df["Month_Sold"] = df["Date_Sold"].dt.month
df = df.drop(columns= ['Listing_ID','Agency_Name', 'Postcode', 'Date_Sold', 'Address', 'Suburb', 'Longitude', 'Latitude', 'Primary_School_Name',
       'Secondary_School_Name'] )


df.head(2)

Unnamed: 0,Price,Property_Type,Bedrooms,Bathrooms,Parking_Spaces,Land_Size,Primary_School_Distance,Secondary_School_Distance,Distance_to_CBD,Distance_to_Coast,Secondary_ICSEA,Primary_ICSEA,Year_Sold,Month_Sold
0,880000,1,3,2,5,1533,1149,508,17721,26634.585679,1067,1043,2023,8
1,3850000,1,5,3,3,1532,418,418,9983,1539.284806,1197,1197,2016,6


In [26]:
def catag_feature():
    """
    category features into numeric and category features for data processing in pipeline
    """
    numeric_features = ['Property_Type', 'Bedrooms', 'Bathrooms', 'Parking_Spaces', 'Land_Size', 'Primary_School_Distance', 
                        'Secondary_School_Distance', 'Distance_to_CBD', 'Distance_to_Coast', 'Secondary_ICSEA', 
                        'Primary_ICSEA', 'Year_Sold', 'Month_Sold']
    
    category_features = []
    
    return category_features, numeric_features

In [27]:
# define Features (X) and target (y)
X = df.drop(columns=['Price'])  
y = df['Price']   

# split training and testing dataset
X_train_full, X_val, y_train_full, y_val = train_test_split(X, y, test_size=0.25, random_state=100)





# Enable automatic logging for XGBoost
mlflow.xgboost.autolog()

def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 500),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "objective": "reg:squarederror"
    }

    with mlflow.start_run(nested=True):
        category_features, numeric_features = catag_feature()
        
        numeric_transformer = Pipeline(
            steps=[
                ("imputer", SimpleImputer(strategy="mean")), ("scaler", StandardScaler())
                ]
        )
        # categorical_transformer = Pipeline(
        #     steps=[
        #         ("encoder", OneHotEncoder(handle_unknown="ignore"))
        #     ]
        # )

        preprocessor = ColumnTransformer(
            transformers=[
                ("numerical", numeric_transformer, numeric_features),
                # ("cat", categorical_transformer, category_features),
            ]
        )

        pipeline = Pipeline([
            ("preprocessing", preprocessor),
            ("model", xgb.XGBRegressor(**params))
        ])
        
        # Fit pipeline
        pipeline.fit(X_train_full, y_train_full)

        # loging pipeline
        mlflow.sklearn.log_model(pipeline, "model")

        # Predict and evaluate
        preds = pipeline.predict(X_val)
        rmse = root_mean_squared_error(y_val, preds)

        # Log parameters and metrics
        mlflow.log_params(params)
        mlflow.log_metric("rmse", rmse)

        return rmse

# Start the main MLflow experiment
mlflow.set_tracking_uri(uri="http://localhost:8080/")
mlflow.set_experiment("housing-price-prediction")


with mlflow.start_run(run_name="optuna_hpo"):
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=50)

    best_params = study.best_trial.params
    best_rmse = study.best_value

    mlflow.log_params(best_params)
    mlflow.log_metric("best_rmse", best_rmse)

2025/08/03 18:31:43 INFO mlflow.tracking.fluent: Experiment with name 'housing-price-prediction' does not exist. Creating a new experiment.
[I 2025-08-03 18:31:44,403] A new study created in memory with name: no-name-75a81f72-bd4f-4abb-9a39-3cb0b4fc9840
[I 2025-08-03 18:31:57,371] Trial 0 finished with value: 249885.0953254958 and parameters: {'n_estimators': 371, 'max_depth': 4, 'learning_rate': 0.026600727745702518, 'subsample': 0.872293718206651, 'colsample_bytree': 0.6564895027611447}. Best is trial 0 with value: 249885.0953254958.


🏃 View run unique-chimp-831 at: http://localhost:8080/#/experiments/1/runs/aaec0205c608420288657d813a1f37bc
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:32:06,365] Trial 1 finished with value: 228264.69896727204 and parameters: {'n_estimators': 298, 'max_depth': 6, 'learning_rate': 0.0323341875398618, 'subsample': 0.6263371859791584, 'colsample_bytree': 0.7505652650428476}. Best is trial 1 with value: 228264.69896727204.


🏃 View run rebellious-fly-930 at: http://localhost:8080/#/experiments/1/runs/b4697581ca594f84a6c711f54a151e71
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:32:19,958] Trial 2 finished with value: 217754.90443137087 and parameters: {'n_estimators': 351, 'max_depth': 10, 'learning_rate': 0.03574244714305171, 'subsample': 0.8632565418126902, 'colsample_bytree': 0.8913206331716345}. Best is trial 2 with value: 217754.90443137087.


🏃 View run intrigued-whale-732 at: http://localhost:8080/#/experiments/1/runs/0a46de0028e14f6d9d6f13bdd574e71d
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:32:34,436] Trial 3 finished with value: 216526.21385688032 and parameters: {'n_estimators': 496, 'max_depth': 10, 'learning_rate': 0.04727642478084092, 'subsample': 0.641502003859468, 'colsample_bytree': 0.9725848819725652}. Best is trial 3 with value: 216526.21385688032.


🏃 View run unequaled-mole-547 at: http://localhost:8080/#/experiments/1/runs/6b4b3a64c1c445899207ae47ad484dfe
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:32:43,956] Trial 4 finished with value: 237678.56841414946 and parameters: {'n_estimators': 391, 'max_depth': 4, 'learning_rate': 0.043921334148652645, 'subsample': 0.7134271595582109, 'colsample_bytree': 0.6901375371190247}. Best is trial 3 with value: 216526.21385688032.


🏃 View run respected-boar-300 at: http://localhost:8080/#/experiments/1/runs/4a40205fb5544bad9779f04c5b053941
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:32:53,581] Trial 5 finished with value: 255931.9067189444 and parameters: {'n_estimators': 457, 'max_depth': 3, 'learning_rate': 0.0321624600672066, 'subsample': 0.756663562870457, 'colsample_bytree': 0.8502428734526302}. Best is trial 3 with value: 216526.21385688032.


🏃 View run bustling-fly-511 at: http://localhost:8080/#/experiments/1/runs/8db712aad5be4e4998380baec5174d12
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:33:02,302] Trial 6 finished with value: 266277.1527475978 and parameters: {'n_estimators': 176, 'max_depth': 3, 'learning_rate': 0.056357866962450764, 'subsample': 0.8224317527225947, 'colsample_bytree': 0.9120012922710343}. Best is trial 3 with value: 216526.21385688032.


🏃 View run resilient-jay-61 at: http://localhost:8080/#/experiments/1/runs/08fc166d639b4eff9881871f24a2ea04
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:33:11,037] Trial 7 finished with value: 252985.24578066013 and parameters: {'n_estimators': 181, 'max_depth': 4, 'learning_rate': 0.04909747593739774, 'subsample': 0.6119541978111946, 'colsample_bytree': 0.6819700419213774}. Best is trial 3 with value: 216526.21385688032.


🏃 View run mysterious-sloth-462 at: http://localhost:8080/#/experiments/1/runs/7ed269d71784437690fd5926dab437f9
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:33:21,263] Trial 8 finished with value: 231415.45867765567 and parameters: {'n_estimators': 303, 'max_depth': 4, 'learning_rate': 0.0850838711809661, 'subsample': 0.9572385989354375, 'colsample_bytree': 0.8962227052790903}. Best is trial 3 with value: 216526.21385688032.


🏃 View run nosy-skunk-320 at: http://localhost:8080/#/experiments/1/runs/65d0e30f632e40e3b8046e23decd42f4
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:33:33,093] Trial 9 finished with value: 212928.3476557463 and parameters: {'n_estimators': 443, 'max_depth': 8, 'learning_rate': 0.04537046716983401, 'subsample': 0.6907613247323833, 'colsample_bytree': 0.7562324512085266}. Best is trial 9 with value: 212928.3476557463.


🏃 View run sneaky-ram-964 at: http://localhost:8080/#/experiments/1/runs/a433d0edda4c48b88ff65deaffee34ea
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:33:42,888] Trial 10 finished with value: 214083.1609685908 and parameters: {'n_estimators': 239, 'max_depth': 8, 'learning_rate': 0.06949196616406003, 'subsample': 0.7079062142730729, 'colsample_bytree': 0.6006807457454473}. Best is trial 9 with value: 212928.3476557463.


🏃 View run intelligent-lark-323 at: http://localhost:8080/#/experiments/1/runs/d07fbe21d61c4d92b72c744c1d9fe9da
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:33:51,796] Trial 11 finished with value: 222258.08675145856 and parameters: {'n_estimators': 103, 'max_depth': 8, 'learning_rate': 0.07552289123079146, 'subsample': 0.7131910596287758, 'colsample_bytree': 0.7703187310302129}. Best is trial 9 with value: 212928.3476557463.


🏃 View run kindly-cow-534 at: http://localhost:8080/#/experiments/1/runs/e35eabebece6447ea7f85269adf7924f
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:34:01,141] Trial 12 finished with value: 247128.98113065292 and parameters: {'n_estimators': 240, 'max_depth': 8, 'learning_rate': 0.01206107471277975, 'subsample': 0.7017022174879632, 'colsample_bytree': 0.6080043494090425}. Best is trial 9 with value: 212928.3476557463.


🏃 View run defiant-kit-864 at: http://localhost:8080/#/experiments/1/runs/5a2762f84894418cba658631a58d1afd
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:34:14,347] Trial 13 finished with value: 211702.05751462237 and parameters: {'n_estimators': 427, 'max_depth': 8, 'learning_rate': 0.0662399847199105, 'subsample': 0.7721253779324333, 'colsample_bytree': 0.8177057953204285}. Best is trial 13 with value: 211702.05751462237.


🏃 View run rare-skink-949 at: http://localhost:8080/#/experiments/1/runs/456f7bf7077d4177b439bf855a9723ca
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:34:24,434] Trial 14 finished with value: 213084.590620302 and parameters: {'n_estimators': 430, 'max_depth': 7, 'learning_rate': 0.0953981532852129, 'subsample': 0.7881266336230766, 'colsample_bytree': 0.8085469412749436}. Best is trial 13 with value: 211702.05751462237.


🏃 View run salty-crow-220 at: http://localhost:8080/#/experiments/1/runs/7c8532eb803a46d988211f052398344e
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:34:34,255] Trial 15 finished with value: 214905.07835586535 and parameters: {'n_estimators': 488, 'max_depth': 6, 'learning_rate': 0.061674915329731766, 'subsample': 0.9502157152059273, 'colsample_bytree': 0.7369640736161315}. Best is trial 13 with value: 211702.05751462237.


🏃 View run bedecked-tern-831 at: http://localhost:8080/#/experiments/1/runs/ef48de5534c54447990117546272f602
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:34:45,965] Trial 16 finished with value: 213980.97188400425 and parameters: {'n_estimators': 425, 'max_depth': 9, 'learning_rate': 0.06460323371484958, 'subsample': 0.6647276167848715, 'colsample_bytree': 0.819433357466269}. Best is trial 13 with value: 211702.05751462237.


🏃 View run flawless-bass-743 at: http://localhost:8080/#/experiments/1/runs/26f465bf50e34d8094c0d262f2c02e7a
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:34:55,448] Trial 17 finished with value: 213833.34518445577 and parameters: {'n_estimators': 331, 'max_depth': 7, 'learning_rate': 0.08568519949958357, 'subsample': 0.760717904112308, 'colsample_bytree': 0.8464197127023205}. Best is trial 13 with value: 211702.05751462237.


🏃 View run lyrical-fly-149 at: http://localhost:8080/#/experiments/1/runs/8013b553427448e3b3d7e343abb8ffd6
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:35:07,578] Trial 18 finished with value: 217190.69318143488 and parameters: {'n_estimators': 410, 'max_depth': 9, 'learning_rate': 0.019114925981011888, 'subsample': 0.8385381454818589, 'colsample_bytree': 0.7260974128226054}. Best is trial 13 with value: 211702.05751462237.


🏃 View run flawless-ram-613 at: http://localhost:8080/#/experiments/1/runs/fc73f6270c02492e8a4415e184892d31
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:35:20,703] Trial 19 finished with value: 215371.80186396692 and parameters: {'n_estimators': 458, 'max_depth': 9, 'learning_rate': 0.07278512896208922, 'subsample': 0.9113566458573011, 'colsample_bytree': 0.9973048453702744}. Best is trial 13 with value: 211702.05751462237.


🏃 View run likeable-gnat-592 at: http://localhost:8080/#/experiments/1/runs/3160ad2865f147e8a8340b7c95c4c54b
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:35:31,365] Trial 20 finished with value: 213988.66400547366 and parameters: {'n_estimators': 462, 'max_depth': 7, 'learning_rate': 0.04277131015433683, 'subsample': 0.7638413041617065, 'colsample_bytree': 0.7819217089369641}. Best is trial 13 with value: 211702.05751462237.


🏃 View run unleashed-zebra-266 at: http://localhost:8080/#/experiments/1/runs/624018863a424a438a78cb7942b57002
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:35:42,239] Trial 21 finished with value: 212315.75542310733 and parameters: {'n_estimators': 418, 'max_depth': 7, 'learning_rate': 0.09664690348612619, 'subsample': 0.7863875639249094, 'colsample_bytree': 0.8148231301744091}. Best is trial 13 with value: 211702.05751462237.


🏃 View run peaceful-sloth-213 at: http://localhost:8080/#/experiments/1/runs/99f56fa20cc64891ae8a15976ec9d325
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:35:52,560] Trial 22 finished with value: 214690.47118333506 and parameters: {'n_estimators': 382, 'max_depth': 6, 'learning_rate': 0.0979326561269741, 'subsample': 0.7996561195704086, 'colsample_bytree': 0.8573951714597065}. Best is trial 13 with value: 211702.05751462237.


🏃 View run indecisive-frog-386 at: http://localhost:8080/#/experiments/1/runs/6263eeaf402d40b5af4c5791312cc5eb
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:36:03,661] Trial 23 finished with value: 214647.4514555396 and parameters: {'n_estimators': 434, 'max_depth': 8, 'learning_rate': 0.0807987275528382, 'subsample': 0.6612434968935158, 'colsample_bytree': 0.8129704910436031}. Best is trial 13 with value: 211702.05751462237.


🏃 View run rambunctious-squid-0 at: http://localhost:8080/#/experiments/1/runs/44ab989b80014156a149e3b2f46323f7
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:36:13,237] Trial 24 finished with value: 221209.3287494662 and parameters: {'n_estimators': 400, 'max_depth': 5, 'learning_rate': 0.05853096623320251, 'subsample': 0.741338672294202, 'colsample_bytree': 0.7169480275991702}. Best is trial 13 with value: 211702.05751462237.


🏃 View run gifted-jay-159 at: http://localhost:8080/#/experiments/1/runs/b79b68011a82489dbb9a3b48b4ac3a92
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:36:24,117] Trial 25 finished with value: 214986.33694057039 and parameters: {'n_estimators': 341, 'max_depth': 7, 'learning_rate': 0.09219573665590014, 'subsample': 0.673712251396336, 'colsample_bytree': 0.7750545449295776}. Best is trial 13 with value: 211702.05751462237.


🏃 View run traveling-boar-51 at: http://localhost:8080/#/experiments/1/runs/ae88104487f8464282eb49552ccc6b79
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:36:36,604] Trial 26 finished with value: 215764.39829381387 and parameters: {'n_estimators': 469, 'max_depth': 9, 'learning_rate': 0.05406970911011599, 'subsample': 0.7943762692636813, 'colsample_bytree': 0.9317543532814608}. Best is trial 13 with value: 211702.05751462237.


🏃 View run clumsy-goose-304 at: http://localhost:8080/#/experiments/1/runs/b01207cfde224a5f833638196fa3edfb
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:36:47,316] Trial 27 finished with value: 214854.693591459 and parameters: {'n_estimators': 312, 'max_depth': 8, 'learning_rate': 0.0680860901495601, 'subsample': 0.995438827954584, 'colsample_bytree': 0.8320643733791243}. Best is trial 13 with value: 211702.05751462237.


🏃 View run abrasive-owl-772 at: http://localhost:8080/#/experiments/1/runs/021824103a23402a88384e82f5d79ee2
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:36:56,521] Trial 28 finished with value: 219099.2249553735 and parameters: {'n_estimators': 367, 'max_depth': 5, 'learning_rate': 0.07975423823507066, 'subsample': 0.8442977239823968, 'colsample_bytree': 0.8718252949464982}. Best is trial 13 with value: 211702.05751462237.


🏃 View run salty-lynx-280 at: http://localhost:8080/#/experiments/1/runs/59259051e1724d549e76cf87ecb5ece0
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:37:05,989] Trial 29 finished with value: 228838.57535910988 and parameters: {'n_estimators': 267, 'max_depth': 7, 'learning_rate': 0.022796193111749834, 'subsample': 0.9088501753092252, 'colsample_bytree': 0.8007291110475588}. Best is trial 13 with value: 211702.05751462237.


🏃 View run agreeable-moth-585 at: http://localhost:8080/#/experiments/1/runs/f0229d5d99294cebb230403ec138bae0
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:37:15,362] Trial 30 finished with value: 229559.5588907954 and parameters: {'n_estimators': 367, 'max_depth': 5, 'learning_rate': 0.03879563527214969, 'subsample': 0.7395671756740595, 'colsample_bytree': 0.6616057922528333}. Best is trial 13 with value: 211702.05751462237.


🏃 View run youthful-deer-797 at: http://localhost:8080/#/experiments/1/runs/93857d2b35f642b1952c643b691b1475
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:37:25,590] Trial 31 finished with value: 212001.00346945706 and parameters: {'n_estimators': 431, 'max_depth': 7, 'learning_rate': 0.09846038789703203, 'subsample': 0.7913667063893213, 'colsample_bytree': 0.7628704853766913}. Best is trial 13 with value: 211702.05751462237.


🏃 View run zealous-eel-469 at: http://localhost:8080/#/experiments/1/runs/0b544cea759c481684a0b462d146bfce
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:37:35,534] Trial 32 finished with value: 213942.6862195648 and parameters: {'n_estimators': 440, 'max_depth': 6, 'learning_rate': 0.09042690417829935, 'subsample': 0.815071480239104, 'colsample_bytree': 0.7465325742531757}. Best is trial 13 with value: 211702.05751462237.


🏃 View run rebellious-wolf-198 at: http://localhost:8080/#/experiments/1/runs/b9635ae31dde467b9152ac0818d65d3e
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:37:46,243] Trial 33 finished with value: 213008.04916911814 and parameters: {'n_estimators': 411, 'max_depth': 8, 'learning_rate': 0.09780156262508885, 'subsample': 0.7790669263755998, 'colsample_bytree': 0.7650074700749473}. Best is trial 13 with value: 211702.05751462237.


🏃 View run gregarious-lynx-322 at: http://localhost:8080/#/experiments/1/runs/3abd73d5e51a41acb7b5e194d91732bf
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:37:56,659] Trial 34 finished with value: 210947.30719116554 and parameters: {'n_estimators': 483, 'max_depth': 7, 'learning_rate': 0.08870041771552914, 'subsample': 0.8614979861853108, 'colsample_bytree': 0.7874546287981168}. Best is trial 34 with value: 210947.30719116554.


🏃 View run carefree-boar-304 at: http://localhost:8080/#/experiments/1/runs/f45a0956ac884bd78e15ddab01eccee4
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:38:07,003] Trial 35 finished with value: 210017.80703459049 and parameters: {'n_estimators': 498, 'max_depth': 7, 'learning_rate': 0.08985240554148372, 'subsample': 0.8748997258518308, 'colsample_bytree': 0.7930861524502423}. Best is trial 35 with value: 210017.80703459049.


🏃 View run intrigued-swan-148 at: http://localhost:8080/#/experiments/1/runs/80bae48c76794d26b822afc9878f423b
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:38:18,256] Trial 36 finished with value: 212254.58923310591 and parameters: {'n_estimators': 495, 'max_depth': 6, 'learning_rate': 0.08821496282046619, 'subsample': 0.8768630872626954, 'colsample_bytree': 0.7074714463484891}. Best is trial 35 with value: 210017.80703459049.


🏃 View run merciful-duck-977 at: http://localhost:8080/#/experiments/1/runs/dc7fa7fe5c4b4ec48d1b098b40ec4651
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:38:31,662] Trial 37 finished with value: 210035.84029857063 and parameters: {'n_estimators': 477, 'max_depth': 7, 'learning_rate': 0.07973761134972115, 'subsample': 0.8693105742122865, 'colsample_bytree': 0.7903083204561049}. Best is trial 35 with value: 210017.80703459049.


🏃 View run rogue-bug-996 at: http://localhost:8080/#/experiments/1/runs/6497780b38604731852d1fe82c9c5c0f
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:38:48,751] Trial 38 finished with value: 217102.15979893293 and parameters: {'n_estimators': 480, 'max_depth': 10, 'learning_rate': 0.07933965224343864, 'subsample': 0.8749196294155361, 'colsample_bytree': 0.7846671848990078}. Best is trial 35 with value: 210017.80703459049.


🏃 View run learned-swan-996 at: http://localhost:8080/#/experiments/1/runs/11492101fde94e0fbef9d34e4bf57bb2
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:38:59,050] Trial 39 finished with value: 216723.20924232804 and parameters: {'n_estimators': 500, 'max_depth': 5, 'learning_rate': 0.07576297261798516, 'subsample': 0.9053236649898437, 'colsample_bytree': 0.8665829635571787}. Best is trial 35 with value: 210017.80703459049.


🏃 View run aged-fox-383 at: http://localhost:8080/#/experiments/1/runs/c99afdb5ff91454bb8e9b1c98218a06e
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:39:09,117] Trial 40 finished with value: 213571.43861993638 and parameters: {'n_estimators': 470, 'max_depth': 6, 'learning_rate': 0.0836177505829586, 'subsample': 0.8503171628122532, 'colsample_bytree': 0.7870783686954531}. Best is trial 35 with value: 210017.80703459049.


🏃 View run masked-worm-4 at: http://localhost:8080/#/experiments/1/runs/8ebd9183002a48eb848e145f9b8d5099
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:39:19,471] Trial 41 finished with value: 211906.56770838902 and parameters: {'n_estimators': 450, 'max_depth': 7, 'learning_rate': 0.09217839228896142, 'subsample': 0.8213762741045634, 'colsample_bytree': 0.8337093469993946}. Best is trial 35 with value: 210017.80703459049.


🏃 View run monumental-fish-414 at: http://localhost:8080/#/experiments/1/runs/69c9f413025641749d3a611d3cac92cf
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:39:31,514] Trial 42 finished with value: 209738.88294286685 and parameters: {'n_estimators': 454, 'max_depth': 7, 'learning_rate': 0.09199313737006379, 'subsample': 0.820854430196673, 'colsample_bytree': 0.8310858328214962}. Best is trial 42 with value: 209738.88294286685.


🏃 View run luminous-dove-820 at: http://localhost:8080/#/experiments/1/runs/3010df279f934cb584a15c4ffa0d1295
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:39:43,374] Trial 43 finished with value: 213341.71698151357 and parameters: {'n_estimators': 478, 'max_depth': 8, 'learning_rate': 0.0864643435533038, 'subsample': 0.8867797401304875, 'colsample_bytree': 0.8883035645271904}. Best is trial 42 with value: 209738.88294286685.


🏃 View run selective-conch-375 at: http://localhost:8080/#/experiments/1/runs/396730eab1da42c1af33452bc2b1bd89
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:39:54,209] Trial 44 finished with value: 209455.421559702 and parameters: {'n_estimators': 498, 'max_depth': 7, 'learning_rate': 0.07264841326473245, 'subsample': 0.8614269654059298, 'colsample_bytree': 0.8370732444766079}. Best is trial 44 with value: 209455.421559702.


🏃 View run adorable-tern-776 at: http://localhost:8080/#/experiments/1/runs/ed220bc52d964f369fe0b8ed9bf39ad8
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:40:07,113] Trial 45 finished with value: 210614.59736309733 and parameters: {'n_estimators': 498, 'max_depth': 7, 'learning_rate': 0.07218331765413692, 'subsample': 0.9336021844985529, 'colsample_bytree': 0.8363077808637495}. Best is trial 44 with value: 209455.421559702.


🏃 View run kindly-auk-285 at: http://localhost:8080/#/experiments/1/runs/53458a5a13df4faab8a8fced02354615
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:40:17,336] Trial 46 finished with value: 213375.19005004497 and parameters: {'n_estimators': 500, 'max_depth': 6, 'learning_rate': 0.07232702682029381, 'subsample': 0.939328051010658, 'colsample_bytree': 0.9135038269086495}. Best is trial 44 with value: 209455.421559702.


🏃 View run incongruous-bass-783 at: http://localhost:8080/#/experiments/1/runs/d4266a1ca2304689bbe5b559190b5a71
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:40:27,345] Trial 47 finished with value: 211631.0826408949 and parameters: {'n_estimators': 464, 'max_depth': 6, 'learning_rate': 0.07464712184602851, 'subsample': 0.8933890295359296, 'colsample_bytree': 0.8384579575416038}. Best is trial 44 with value: 209455.421559702.


🏃 View run upbeat-smelt-949 at: http://localhost:8080/#/experiments/1/runs/abad27da010649d4b4a4c3dc57c1735a
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:40:37,996] Trial 48 finished with value: 210566.6913058495 and parameters: {'n_estimators': 448, 'max_depth': 7, 'learning_rate': 0.08000284210829708, 'subsample': 0.9293337536111983, 'colsample_bytree': 0.8897099494886389}. Best is trial 44 with value: 209455.421559702.


🏃 View run legendary-conch-790 at: http://localhost:8080/#/experiments/1/runs/359cdfecf2a74bafbc99e92596884317
🧪 View experiment at: http://localhost:8080/#/experiments/1


[I 2025-08-03 18:40:48,394] Trial 49 finished with value: 210308.4480670425 and parameters: {'n_estimators': 447, 'max_depth': 7, 'learning_rate': 0.0818134126481448, 'subsample': 0.9818661378285093, 'colsample_bytree': 0.8840448692625059}. Best is trial 44 with value: 209455.421559702.


🏃 View run receptive-ape-416 at: http://localhost:8080/#/experiments/1/runs/d99d896c9f484976b4b6d56b3543c152
🧪 View experiment at: http://localhost:8080/#/experiments/1
🏃 View run optuna_hpo at: http://localhost:8080/#/experiments/1/runs/141e8bd8174e42e1a81e43ab3be9ba56
🧪 View experiment at: http://localhost:8080/#/experiments/1


In [30]:
def model_regisry():

    from mlflow.entities import ViewType
    from datetime import datetime


    date = datetime.now().date # today date

    client = MlflowClient(tracking_uri= "http://localhost:8080/")

    runs = client.search_runs(
        experiment_ids='1',
        filter_string="metrics.rmse < 230000",
        run_view_type=ViewType.ACTIVE_ONLY,
        max_results=5,
        order_by=["metrics.rmse ASC"]
    )

    for run in runs:
        print(f"runID: {run.info.run_id}, name: {run.info.run_name}, rmse: {run.data.metrics["rmse"]}")

    # register the first model in the list
    print(f"Fisrt model in the list: {runs[0].info.run_id}")
    run_id = runs[0].info.run_id
    modelURL = f"runs:/{run_id}/model"

    # tags for register model
    tag = {
        "model_type": "xgbRegression",
        "owner": "wing",
        "data_version": "v1.0"
    }

    mlflow.register_model(model_uri=modelURL, 
                          name= "Perth housing price prediction",
                          tags= tag)
    

    registered_models = client.search_registered_models()

    for model in registered_models:
        model_name = model.name
        if model_name == "Perth housing price prediction":
            for v in model.latest_versions:
                model_version = v.version

            client.transition_model_version_stage(
                name= model_name,
                version= model_version,
                stage="Production",
            )
            print(f"Perth housing price prediction - {model_version} : moved to production")
        else: 
            pass

    
    return run_id, model_version, model_name


In [37]:
def export_model(model_name, experiement_id):
    client = MlflowClient()
    model_version = client.get_model_version(model_name, experiement_id)

    model_folder_name = model_version.source.replace("models:/", "")

    import shutil

    # moving model folder to depolyment
    source = f"../docker/mlflow-prefect/mlflow/mlartifacts/1/models/{model_folder_name}"
    destination = "../deploy/model/"

    shutil.copytree(src=source, dst=destination, dirs_exist_ok=True)



In [38]:
export_model("Perth housing price prediction", "1")

In [31]:
model_regisry()

runID: ed220bc52d964f369fe0b8ed9bf39ad8, name: adorable-tern-776, rmse: 209455.421559702
runID: 3010df279f934cb584a15c4ffa0d1295, name: luminous-dove-820, rmse: 209738.88294286685
runID: 80bae48c76794d26b822afc9878f423b, name: intrigued-swan-148, rmse: 210017.80703459049
runID: 6497780b38604731852d1fe82c9c5c0f, name: rogue-bug-996, rmse: 210035.84029857063
runID: d99d896c9f484976b4b6d56b3543c152, name: receptive-ape-416, rmse: 210308.4480670425
Fisrt model in the list: ed220bc52d964f369fe0b8ed9bf39ad8


Successfully registered model 'Perth housing price prediction'.
2025/08/03 18:42:04 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Perth housing price prediction, version 1
Created version '1' of model 'Perth housing price prediction'.
  client.transition_model_version_stage(


Perth housing price prediction - 1 : moved to production


('ed220bc52d964f369fe0b8ed9bf39ad8', '1', 'Perth housing price prediction')

In [32]:
client = MlflowClient()
model_version = client.get_model_version("Perth housing price prediction", "1")

print("Source path:", model_version.source) 

Source path: models:/m-f70ce8e745ad4ef0a4a37542b79a6cf5


In [17]:
model_version.source.replace("models:/", "")

'm-f2b3160bebf74881aac843e6d3e2d679'