In [9]:
import mlflow
import numpy as np
import pandas as pd

import joblib
import os
from dotenv import load_dotenv

from collections import defaultdict

import json

In [10]:
from mlflow.models import Model
from mlflow.models.signature import infer_signature

Some questions: 
- can we make inference on data with no trial/paid? No, we need to filter out

### 0. Setup MLFlow

In [11]:
load_dotenv()

True

In [None]:
MLFLOW_URI = os.environ.get("MLFLOW_URI")
mlflow.set_tracking_uri(MLFLOW_URI)

### 1. Reading the data

In [None]:
df = pd.read_csv("/Users/emulie/Documents/poc/T2PArima/data/merged_20250804.csv")

In [None]:
N = 1000
dff = df[:N]

### 2. Loading the model - t2p + labelencoder

#### Models

In [None]:
# GLM_ANDROID_MODEL_URI = "mlflow-artifacts:/5/f229eb7575fa4f3eabfcf87c060065ed/artifacts/glm_t2p"
# XGB_ANDROID_MODEL_URI = "mlflow-artifacts:/5/2c216ad97c47402887493cf2500fe44f/artifacts/xgb_t2p"
# META_ANDROID_MODEL_URI = "mlflow-artifacts:/5/522908f93fac450d81a9de1abd552336/artifacts/meta_rf_t2p"

# GLM_IOS_MODEL_URI = "mlflow-artifacts:/5/459fbd2a2f8b4552b68556c4eb43b257/artifacts/glm_t2p"
# XGB_IOS_MODEL_URI = "mlflow-artifacts:/5/86a78174c1f94c87a6e8366939c87fbe/artifacts/xgb_t2p"
# META_IOS_MODEL_URI = "mlflow-artifacts:/5/c15c62f08fc6479c9821b2ff1ecb36cc/artifacts/meta_rf_t2p"

# glm_android_model = mlflow.pyfunc.load_model(GLM_ANDROID_MODEL_URI)
# xgb_android_model = mlflow.pyfunc.load_model(XGB_ANDROID_MODEL_URI)
# meta_android_model = mlflow.pyfunc.load_model(META_ANDROID_MODEL_URI)

# glm_ios_model = mlflow.pyfunc.load_model(GLM_IOS_MODEL_URI)
# xgb_ios_model = mlflow.pyfunc.load_model(XGB_IOS_MODEL_URI)
# meta_ios_model = mlflow.pyfunc.load_model(META_IOS_MODEL_URI)

In [None]:
models_dct = {
    "glm_android": {
        "uri": "mlflow-artifacts:/5/a1f2f2af04924ce5b021d588c55c2242/artifacts/glm_t2p", 
    },
    "xgb_android": {
        "uri": "mlflow-artifacts:/5/2c216ad97c47402887493cf2500fe44f/artifacts/xgb_t2p", 
    },
    "meta_android": {
        "uri": "mlflow-artifacts:/5/522908f93fac450d81a9de1abd552336/artifacts/meta_rf_t2p", 
    },
    "glm_ios": {
        "uri": "mlflow-artifacts:/5/6515f8a031ab42998864a1d361ca6689/artifacts/glm_t2p", 
    },
    "xgb_ios": {
        "uri": "mlflow-artifacts:/5/86a78174c1f94c87a6e8366939c87fbe/artifacts/xgb_t2p", 
    },
    "meta_ios": {
        "uri": "mlflow-artifacts:/5/c15c62f08fc6479c9821b2ff1ecb36cc/artifacts/meta_rf_t2p", 
    },
}


In [None]:
models = defaultdict(dict)
for model_name, model_dct in models_dct.items():
    uri = model_dct['uri']
    model_path = mlflow.pyfunc.load_model(uri)
    model_metadata = mlflow.models.get_model_info(uri)
    models[model_name]['model_path'] = model_path
    models[model_name]['metadata'] = model_metadata
    models[model_name]['signature'] = model_metadata._signature_dict
    models[model_name]['X_cols'] = [item['name'] for item in json.loads(models[model_name]['signature']['inputs'])]

#### Get Model Signature from MLModel

#### Label Encoder Artifacts

In [None]:
# LABELENCODER_BASE_URI = "mlflow-artifacts:/5/7ce897e47d524068881351b0dc4d22fe/artifacts/labelencoder_country.pkl"
LABELENCODER_BASE_URI = "mlflow-artifacts:/5/7ce897e47d524068881351b0dc4d22fe/artifacts"

encoders = {}
for col in ['network', 'country', 'platform']:
    encoder_uri = os.path.join(LABELENCODER_BASE_URI, f"labelencoder_{col}.pkl")
    encoder = mlflow.artifacts.download_artifacts(encoder_uri)
    encoders[col] = joblib.load(encoder)

In [None]:
# --- OPTIONAL: filtering out unknown label (TODO)


In [None]:
# for col in ['network', 'country', 'platform']:
#     dff[f"{col}_encoded"] = encoders[col].transform(dff[col], handle_unknown='ignore')

In [None]:
# encoders['network'].classes_

### 3. Data Transformation - Encoding Categorical Variable 

### 4. Making the Inference

In [None]:
android_mask = dff['platform'] == 'android'
ios_mask = dff['platform'] == 'ios'

dff_android = dff[android_mask]
dff_ios = dff[ios_mask]

#### ANDROID GLM Inference

In [None]:

X_cols = models['glm_android']['X_cols']
df_glm_android = dff_android[X_cols]

In [None]:
df_glm_android['cost_usd'] = np.log(df_glm_android['cost_usd'])

In [None]:
df_glm_android['t2p_prediction'] = models['glm_android']['model_path'].predict(df_glm_android)

In [None]:
# type(df_glm_android['t2p_prediction'].iloc[df_glm_android.shape[0]-1])

In [None]:
inf_mask = np.isinf(df_glm_android['t2p_prediction'])

In [None]:
any(inf_mask)

#### ANDROID XGB Inference

In [None]:
X_cols = models['xgb_android']['X_cols']
df_xgb_android = dff_android[X_cols]

In [None]:
df_xgb_android['t2p_predictions'] = models['xgb_android']['model_path'].predict(df_xgb_android)

#### ANDROID RF Inference

In [None]:
X_cols

In [None]:
X_cols = models['meta_android']['X_cols']
df_rf_android = dff_android[X_cols]

In [None]:
df_rf_android['t2p_predictions'] = models['meta_android']['model_path'].predict(df_xgb_android)

In [None]:
model_name = "GLM_android_overall"
stage = "1" # latest
model_uri = f"models:/{model_name}/{stage}"

In [None]:
model_test = mlflow.pyfunc.load_model(model_uri)

In [12]:
# model_uri_test = "models:/GLM_android_overall/latest"
model_uri_test = "models:/META_android_overall/latest"
model2 = mlflow.pyfunc.load_model(model_uri_test)

Downloading artifacts: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00,  7.26it/s]


In [None]:
# t = mlflow.pyfunc.get_model_dependencies(model_uri_test)