# Introduct

Tests the MLflow tracking from KF notebook

In [1]:
import sys, os

In [2]:
# install the mlflow==2.9.2 sdk, since the backend is 2.9.2 from helm chart
# !{sys.executable} -m pip install --user --upgrade mlflow==2.9.2 python-dotenv==1.0.1

In [3]:
import tensorflow as tf
print(tf.__version__)

2024-01-26 17:39:15.815910: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-01-26 17:39:15.815941: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


2.9.3


## Using remote tracking env variable
```python
os.environ["MLFLOW_TRACKING_URI"]="https://mymlflowhost.example.com"
os.environ["MLFLOW_TRACKING_USERNAME"]="user"
os.environ["MLFLOW_TRACKING_PASSWORD"]=""
```

* https://mlflow.org/docs/latest/tracking/tutorials/remote-server.html
* https://www.mlflow.org/docs/latest/auth/index.html#authenticating-to-mlflow

In [4]:
#%%writefile .mlflow_env
## environment variables for ssh
#MLFLOW_TRACKING_URI="https://mymlflowhost.example.com"
#MLFLOW_TRACKING_USERNAME="user"
#MLFLOW_TRACKING_PASSWORD=""

In [5]:
from dotenv import load_dotenv
import os

mlflow_env_file=".mlflow_env"
load_dotenv(dotenv_path=mlflow_env_file, override=True)

True

In [6]:
def print_mlflow_env():
    print(f"mlflow env:\n\
{os.environ['MLFLOW_TRACKING_URI']}\n\
{os.environ['MLFLOW_TRACKING_USERNAME']}\n\
{os.environ['MLFLOW_TRACKING_PASSWORD']}\n")
    
# print_mlflow_env()

In [7]:
MLFLOW_EXPERIMENT_NAME="kubeflow_notebook"

In [8]:
"""Trains and evaluate a simple MLP
on the Reuters newswire topic classification task.
"""
import numpy as np
from tensorflow import keras
from tensorflow.keras.datasets import reuters
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.preprocessing.text import Tokenizer

# The following import and function call are the only additions to code required
# to automatically log metrics and parameters to MLflow.
import mlflow
import time 

# Set the run name to timestamp
# run_name = str(time.time())
# Set the run name to time string
run_name = time.strftime("%Y-%m-%d_%H-%M-%S")

# Create the experiment
# default experiment id is 0
# this will be shown in the remote mlflow server as experiment name
experiment_name = MLFLOW_EXPERIMENT_NAME
# search_pattern = f"name LIKE '{experiment_name}'"
search_pattern = f"name = '{experiment_name}'"
# search the experiment with the name, if doesn't exist will return an empty list
experiments = mlflow.search_experiments(filter_string=search_pattern)
if len(experiments) == 0:
   experiment_id = mlflow.create_experiment(name=experiment_name)
   print(f"experiment with string id {experiment_id} is created.")

mlflow.tensorflow.autolog()
mlflow.set_experiment(experiment_name=experiment_name)
mlflow.set_tag("mlflow.runName", run_name)

max_words = 1000
batch_size = 32
epochs = 5

print("Loading data...")
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words, test_split=0.2)

print(len(x_train), "train sequences")
print(len(x_test), "test sequences")

num_classes = np.max(y_train) + 1
print(num_classes, "classes")

print("Vectorizing sequence data...")
tokenizer = Tokenizer(num_words=max_words)
x_train = tokenizer.sequences_to_matrix(x_train, mode="binary")
x_test = tokenizer.sequences_to_matrix(x_test, mode="binary")
print("x_train shape:", x_train.shape)
print("x_test shape:", x_test.shape)

print("Convert class vector to binary class matrix (for use with categorical_crossentropy)")
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

print("Building model...")
model = Sequential()
model.add(Dense(512, input_shape=(max_words,)))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation("softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

history = model.fit(
    x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.1
)
score = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=1)
print("Test score:", score[0])
print("Test accuracy:", score[1])

# Log the model
# mlruns/0/run_id/artifacts/my_models/
# otherwise the autolog() is saving the model at
# mlruns/0/run_id/artifacts/model/
# for mlflow 2.5.0
# mlflow.tensorflow.log_model(model, artifact_path="my_models")

experiment with string id 2 is created.
Loading data...
8982 train sequences
2246 test sequences
46 classes
Vectorizing sequence data...
x_train shape: (8982, 1000)
x_test shape: (2246, 1000)
Convert class vector to binary class matrix (for use with categorical_crossentropy)
y_train shape: (8982, 46)
y_test shape: (2246, 46)
Building model...


2024-01-26 17:39:20.653416: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2024-01-26 17:39:20.653456: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2024-01-26 17:39:20.653484: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:163] no NVIDIA GPU device is present: /dev/nvidia0 does not exist
2024-01-26 17:39:20.653719: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: /tmp/tmps_or4n5h/model/data/model/assets




Test score: 0.8870267271995544
Test accuracy: 0.792965292930603
