# MLOps Phase 2 - Model Training

In [37]:
import mlflow
import random
mlflow.set_tracking_uri(uri="INSERT_URL")

In [38]:
from mlflow.models import infer_signature
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [56]:
parquet_file_path = 'chess_data_2013/processed_chess_data_2013.parquet'
df = pd.read_parquet(parquet_file_path)
df = df.fillna(0)

In [57]:
# Due to Computational Limitations, sampling 10,000 samples from the dataset.
random.seed(10)
sampled_df = df.sample(n=100000, replace=False)

In [62]:
X = sampled_df.drop(columns=['time_control','result'])
y = sampled_df['result']

In [63]:
X

Unnamed: 0,black_rating,white_rating
1753492,1415.0,1689.0
1360200,1523.0,1664.0
62262,2008.0,1823.0
454945,1398.0,1659.0
874989,1590.0,1570.0
...,...,...
981196,1904.0,1846.0
1503351,1500.0,1758.0
1942370,1400.0,1281.0
1430221,1625.0,1056.0


In [64]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

params = {
    "solver": "lbfgs",
    "max_iter": 1000,
    "multi_class": "auto",
    "random_state": 42,
}

lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

In [71]:
mlflow.set_tracking_uri(uri="INSERT_URL")

mlflow.set_experiment("INSERT_EXPERIMENT_NAME")

with mlflow.start_run():
    # Log the hyperparameters
    mlflow.log_params(params)

    # Log the loss metric
    mlflow.log_metric("accuracy", accuracy)
    
    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Phase 2 Model", "Chess Resuld Prediction")

    # Infer the model signature
    signature = infer_signature(X_train, lr.predict(X_train))

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="phase2_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="tracking-quickstart",
    )


Successfully registered model 'tracking-quickstart'.
2024/04/14 12:37:08 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 1
Created version '1' of model 'tracking-quickstart'.
