# Постинг экземпляра модели на mlflow

**Инициализация модели**

In [1]:
from torchinfo import summary
from omegaconf import OmegaConf
from speechgpt.models import AsrLlmCascadeModel

args = OmegaConf.create()
args.llm_config = "Qwen/Qwen2-0.5B"
args.asr_config = "openai/whisper-large-v3-turbo"

cascade = AsrLlmCascadeModel.build_model(args)
cascade

2024-12-31 14:27:11 | INFO | speechgpt_logger | Building ASR-LLM Cascade Model
2024-12-31 14:27:11 | INFO | speechgpt_logger | Loading models: ASR and LLM
2024-12-31 14:27:15 | INFO | speechgpt_logger | Model loaded from openai/whisper-large-v3-turbo
2024-12-31 14:27:15 | INFO | speechgpt_logger | ASR model loaded successfully
2024-12-31 14:27:15 | INFO | speechgpt_logger | Building HuggingFaceQwen2ForCausalLM model.
2024-12-31 14:27:15 | INFO | speechgpt_logger | Loading model from Qwen/Qwen2-0.5B
2024-12-31 14:27:15 | INFO | speechgpt_logger | Initializing Qwen2Decoder with 24 layers.
2024-12-31 14:27:20 | INFO | speechgpt_logger | Qwen2Decoder initialized successfully.
2024-12-31 14:27:22 | INFO | speechgpt_logger | Model initialized successfully.
2024-12-31 14:27:22 | INFO | speechgpt_logger | Loading model weights.
2024-12-31 14:27:24 | INFO | speechgpt_logger | Loaded model weights.
2024-12-31 14:27:24 | INFO | speechgpt_logger | LLM model loaded successfully


AsrLlmCascadeModel(
  (asr): HuggingFaceWhisperModel(
    (encoder): DummyEncoder()
    (decoder): DummyDecoder()
    (model): WhisperForConditionalGeneration(
      (model): WhisperModel(
        (encoder): WhisperEncoder(
          (conv1): Conv1d(128, 1280, kernel_size=(3,), stride=(1,), padding=(1,))
          (conv2): Conv1d(1280, 1280, kernel_size=(3,), stride=(2,), padding=(1,))
          (embed_positions): Embedding(1500, 1280)
          (layers): ModuleList(
            (0-31): 32 x WhisperEncoderLayer(
              (self_attn): WhisperSdpaAttention(
                (k_proj): Linear(in_features=1280, out_features=1280, bias=False)
                (v_proj): Linear(in_features=1280, out_features=1280, bias=True)
                (q_proj): Linear(in_features=1280, out_features=1280, bias=True)
                (out_proj): Linear(in_features=1280, out_features=1280, bias=True)
              )
              (self_attn_layer_norm): LayerNorm((1280,), eps=1e-05, elementwise_affine=Tru

In [2]:
summary(cascade)

Layer (type:depth-idx)                                            Param #
AsrLlmCascadeModel                                                --
├─HuggingFaceWhisperModel: 1-1                                    --
│    └─DummyEncoder: 2-1                                          --
│    └─DummyDecoder: 2-2                                          --
│    └─WhisperForConditionalGeneration: 2-3                       --
│    │    └─WhisperModel: 3-1                                     808,878,080
│    │    └─Linear: 3-2                                           66,388,480
├─HuggingFaceQwen2ForCausalLM: 1-2                                --
│    └─Qwen2Decoder: 2-4                                          --
│    │    └─Embedding: 3-3                                        136,134,656
│    │    └─ModuleList: 3-4                                       357,897,216
│    │    └─Qwen2RMSNorm: 3-5                                     896
│    │    └─Qwen2RotaryEmbedding: 3-6                         

**Синтетическая кривая потерь**

In [3]:
import numpy as np
import matplotlib.pyplot as plt


def plot_loss(loss):
    epochs = np.arange(len(loss)) + 1
    best_epoch = np.argmin(loss).flatten()[0] + 1
    best_train_loss = loss[best_epoch - 1]

    fig, ax = plt.subplots(figsize=(7, 4))
    ax.plot(epochs, loss)
    ax.axvline(best_epoch, c="black", zorder=1)
    ax.scatter(best_epoch, best_train_loss, c="lime", marker="x", zorder=2,
               label=f"Best epoch: {best_epoch}\nBest loss: {best_train_loss:.4f}")
    ax.legend()
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Train loss")
    ax.set_title("Train loss dynamics plot")

    plt.close(fig)
    return fig

**Загрузка на локально запущенный mlflow-сервер**

Для этого в `.env` указываем `MLFLOW_TRACKING_URI="http://localhost:5000"`, если запускаем локально. На удаленном сервере -- пока не предусмотрено

**Создаем эксперимент**

In [4]:
import mlflow
from dotenv import load_dotenv

load_dotenv()

try:
    exp_id = mlflow.create_experiment("speechgpt_cascade").experiment_id
except:
    exp_id = mlflow.set_experiment("speechgpt_cascade").experiment_id

exp_id

'568664667297304622'

**Создаем первый run**

In [5]:
run_name="cascade_run_1"
run_description = """SpeechGPT first cascade iteration"""

with mlflow.start_run(experiment_id=exp_id, run_name=run_name, description=run_description):  # run_name можно тоже поменять
    epochs = np.arange(1, 101)
    loss = 2 / np.sqrt(epochs) + np.random.normal(scale=0.03, size=100)
    best_epoch = np.argmin(loss).flatten()[0] + 1
    best_train_loss = loss[best_epoch - 1]
    
    fig1 = plot_loss(loss)
    
    # mlflow.pytorch.log_model(cascade, "cascade")
    mlflow.log_param("epochs", len(epochs))
    mlflow.log_param("learning rate", 1e-4)
    mlflow.log_metrics({
        "Best train loss": best_train_loss,
        "Best train epoch": best_epoch,
    })

    mlflow.log_figure(fig1, "train_loss.png")
    with open("model_summary.txt", "w") as f:
        f.write(str(summary(cascade)))
    mlflow.log_artifact("model_summary.txt")

🏃 View run cascade_run_1 at: http://localhost:5000/#/experiments/568664667297304622/runs/123bb073b3ab4141ae1ca44f94f05a83
🧪 View experiment at: http://localhost:5000/#/experiments/568664667297304622


**Создаем второй run**

In [6]:
run_name="cascade_run_2"
run_description = """SpeechGPT second cascade iteration"""

with mlflow.start_run(experiment_id=exp_id, run_name=run_name, description=run_description):  # run_name можно тоже поменять
    epochs = np.arange(1, 101)
    loss = 2.5 / np.sqrt(epochs) + np.random.normal(scale=0.05, size=100)
    best_epoch = np.argmin(loss).flatten()[0] + 1
    best_train_loss = loss[best_epoch - 1]
    
    fig2 = plot_loss(loss)
    
    # mlflow.pytorch.log_model(cascade, "cascade")
    mlflow.log_param("epochs", len(epochs))
    mlflow.log_param("learning rate", 1e-5)
    mlflow.log_metrics({
        "Best train loss": best_train_loss,
        "Best train epoch": best_epoch,
    })

    mlflow.log_figure(fig2, "train_loss.png")
    with open("model_summary.txt", "w") as f:
        f.write(str(summary(cascade)))
    mlflow.log_artifact("model_summary.txt")

🏃 View run cascade_run_2 at: http://localhost:5000/#/experiments/568664667297304622/runs/62ce49212b334f9db4f18451b5f7eee8
🧪 View experiment at: http://localhost:5000/#/experiments/568664667297304622


In [7]:
all_runs = mlflow.search_runs(search_all_experiments=True)[["run_id", "tags.mlflow.runName"]]
all_runs

Unnamed: 0,run_id,tags.mlflow.runName
0,62ce49212b334f9db4f18451b5f7eee8,cascade_run_2
1,123bb073b3ab4141ae1ca44f94f05a83,cascade_run_1


In [8]:
run_name = "cascade_run_2"
run_id = all_runs.loc[all_runs["tags.mlflow.runName"] == run_name]["run_id"].values[0]
run_id

'62ce49212b334f9db4f18451b5f7eee8'

In [9]:
run = mlflow.get_run(run_id)
metrics = run.data.metrics
params = run.data.params
artifacts_path = mlflow.artifacts.download_artifacts(run_id=run_id)

Downloading artifacts:   0%|          | 0/2 [00:00<?, ?it/s]

In [10]:
import os

os.listdir(artifacts_path)

['model_summary.txt', 'train_loss.png']

In [11]:
import pandas as pd

pd.DataFrame(metrics.items())

Unnamed: 0,0,1
0,Best train epoch,91.0
1,Best train loss,0.141414


In [12]:
metrics

{'Best train epoch': 91.0, 'Best train loss': 0.1414139378686599}