In [None]:
import mlflow
import pandas as pd
import pytorch_lightning as pl
import torch
from mlflow.entities import ViewType
from mlflow.tracking.client import MlflowClient
from torch.utils.data import DataLoader

from datasets.cats_vs_dogs import CatsDogsDataset, get_preprocessor
from visualization.prediction_grid import prediction_grid

In [None]:
client = MlflowClient()
name = "Cats vs dogs classification"
experiment = client.get_experiment_by_name(name)

In [None]:
mlflow.tracking.get_tracking_uri()

In [None]:
experiments = client.list_experiments()
experiments

In [None]:
query = "params.model = 'mobilenetv2' and metrics.val_accuracy > 0.5"
runs = client.search_runs(
    experiment_ids=experiment.experiment_id,
    filter_string=query,
    run_view_type=ViewType.ACTIVE_ONLY
)

In [None]:
len(runs)

In [None]:
runs[0]

In [None]:
runs[0].data.metrics["val_accuracy"]

Find the best run based on validation accuracy

In [None]:
best_run = sorted(runs, key=lambda run: run.data.metrics["val_accuracy"], reverse=True)[0]
print(best_run)

In [None]:
logged_model_uri = f"runs:/{best_run.info.run_id}/model"
print("logged model uri :", logged_model_uri)
loaded_model = mlflow.pytorch.load_model(logged_model_uri)

In [None]:
seed = 42
pl.seed_everything(seed=seed, workers=True)
df_test = pd.read_csv("data_splits/test.csv")
dataset_test = CatsDogsDataset(
    df_test["img_fp"].values,
    df_test["is_dog"].values,
    preprocess=get_preprocessor(imagenet=(best_run.data.params["use_imagenet_pretrained_weights"])),
)
dataloader_test = DataLoader(
    dataset_test,
    batch_size=16,
    num_workers=8,
)

In [None]:
print("number of images in the test set:", len(dataset_test))

In [None]:
trainer = pl.Trainer(
    gpus=1,
    precision=int(best_run.data.params["precision"]),
    deterministic=True,
)
trainer.test(loaded_model, dataloaders=dataloader_test)

In [None]:
preds = trainer.predict(loaded_model, dataloaders=dataloader_test)

Visualize predictions on a small batch of the test set

In [None]:
dataloader_test_viz = DataLoader(
    dataset_test,
    batch_size=15,
    num_workers=8,
    shuffle=True,
)

device = "cuda" if torch.cuda.is_available() else "cpu"
loaded_model.eval().to(device)

batch_iter = iter(dataloader_test_viz)

In [None]:
inputs, labels, batch_fps = next(batch_iter)
probas = torch.sigmoid(loaded_model(inputs.to(device))).detach().squeeze().cpu()
accuracy = ((probas > 0.5) == labels).float().mean().item()
print("accuracy on test set batch", accuracy)

classes = ["cat", "dog"]
labels_str = list(map(lambda int_label: classes[int(int_label)], labels))
pred_labels_str = list(map(lambda proba: classes[int(proba > 0.5)], probas))
_ = prediction_grid(batch_fps, probas, pred_labels=pred_labels_str, labels=labels_str, ncols=5)