# Experimentation ⚛ 🧪

In [None]:
! pip install --quiet "torch>=1.2.0,<2.0.0" 
! pip install --quiet -U matplotlib==3.1.3 pyyaml==5.4.1 "plotly>=5.9.0,<6.0.0" "seaborn>=0.11.2,<1.0.0"
! pip install --quiet -U "mlfoundry>=0.3.33,<0.4.0"

## Login and initialize MlFoundry client

In [None]:
import mlfoundry as mlf
client = mlf.get_client()

## Fashion MNIST Classification

In [None]:
import json
import random
from types import SimpleNamespace
from timeit import default_timer as timer

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, RandomSampler, Subset

## Start a run

In [None]:
run = client.create_run(project_name="fashion-mnist-demo-trial", run_name="pytorch-cnn")
run.set_tags({"framework": "pytorch", "model-type": "cnn"})

## Load the dataset

In [None]:
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.FashionMNIST('../fashion-mnist-data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST('../fashion-mnist-data', train=False, transform=transform)

In [None]:
! mkdir -p ../fashion-mnist-data/FashionMNIST/gz/
! cp ../fashion-mnist-data/FashionMNIST/raw/*.gz ../fashion-mnist-data/FashionMNIST/gz/

## Log the raw dataset as an artifact

In [None]:
# Log dataset folder as artifact
run.log_artifact("../fashion-mnist-data/FashionMNIST/gz/", artifact_path="fashion-mnist-data")

In [None]:
idx2label = [
  "T-shirt/Top",
  "Trouser",
  "Pullover",
  "Dress",
  "Coat", 
  "Sandal", 
  "Shirt",
  "Sneaker",
  "Bag",
  "Ankle Boot"
]

## Log the label map as a json file

In [None]:
# Log labels as artifact
with open("config.json", "w") as f:
    json.dump({"idx2label": idx2label}, f)
run.log_artifact("config.json", artifact_path="")

## A look at a sample of data

In [None]:
y_train = pd.DataFrame([y for _, y in train_dataset], columns=["y"])
y_test = pd.DataFrame([y for _, y in test_dataset], columns=["y"])
train_sample = y_train.groupby('y').sample(n=3, random_state=42).index
test_sample = y_test.groupby('y').sample(n=3, random_state=42).index

In [None]:
fig = plt.figure(figsize=(30, 10))
for i, idx in enumerate(train_sample):
    image_t, label_idx = train_dataset[idx]
    ax = fig.add_subplot(5, 50 // 5, i + 1, xticks=[], yticks=[])
    ax.imshow(np.squeeze(image_t), cmap='gray')
    ax.set_title(idx2label[label_idx])
    fig.tight_layout()

## Define the CNN Model and training utils

In [None]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output


In [None]:
def set_random_seed(seed_value: int):
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    random.seed(seed_value)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False


def make_dataloader(dataset, batch_size, pin_memory=False, shuffle=False):
    dataloader = DataLoader(
        dataset=dataset,
        batch_size=batch_size,
        shuffle=shuffle,
        num_workers=1,
        pin_memory=pin_memory,
        drop_last=False,
    )
    return dataloader


def get_y(model, device, dataloader):
    model.eval()
    y_true = []
    y_pred = []
    loss = 0
    for (batch_input, batch_target) in tqdm(dataloader, total=len(dataloader), desc="get_y", disable=True):
        batch_input = batch_input.to(device)
        batch_target = batch_target.to(device)
        predicted = model(batch_input)
        loss += F.nll_loss(predicted, batch_target, reduction='sum').item()  # sum up batch loss
        # get the index of the max log-probability
        _y_true = batch_target.cpu().numpy()
        _y_pred = predicted.argmax(dim=1).detach().cpu().numpy()
        y_true.append(_y_true)
        y_pred.append(_y_pred)
    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)
    return loss, y_true, y_pred


def get_eval_metrics(y_true, y_pred):
    return {
      'accuracy': accuracy_score(y_true=y_true, y_pred=y_pred),
      'f1': f1_score(y_true=y_true, y_pred=y_pred, average='weighted'),
    }


def get_metrics(y_true, y_pred, prefix, loss=None):
    metrics_to_log = {}
    if loss is not None:
        metrics_to_log[f"{prefix}/loss"] = loss
    metrics = get_eval_metrics(y_true=y_true, y_pred=y_pred)
    for k, v in metrics.items():
        metrics_to_log[f'{prefix}/{k}'] = v
    return metrics_to_log


def get_plots(y_true, y_pred, labels=None):
    plt.clf()
    report = classification_report(y_true, y_pred, target_names=labels, output_dict=True)
    ax = sns.heatmap(pd.DataFrame(report).iloc[:-1, :].T, annot=True)
    ax.figure.tight_layout()
    report_fig = plt.gcf()
    plt.show()
    z = confusion_matrix(y_true=y_true, y_pred=y_pred)
    cm_fig = px.imshow(
        z,
        text_auto=True,
        aspect="auto",
        labels=dict(x="Predicted Label", y="True Label", color="Productivity"),
        x=labels,
        y=labels,
        width=600,
        height=600
    )
    cm_fig.show()
    return report_fig, cm_fig


  
def get_images(dataset, sample, model, device, prefix):
    images = {}
    dataset = Subset(dataset, sample)
    _, y_true, y_pred = get_y(model=model, device=device, dataloader=make_dataloader(dataset, batch_size=1000))
    for sample_no, (image_t, _), actual_idx, prediction_idx  in zip(sample, dataset, y_true, y_pred):
        images[f"{prefix}_{sample_no}"] = mlf.Image(
            data_or_path=image_t.squeeze().numpy(),
            caption=f"{prefix}_{sample_no}",
            class_groups={"actuals": idx2label[actual_idx], "predictions": idx2label[prediction_idx]}
        )
    return images

## Log training hyperparameters

In [None]:
args = SimpleNamespace(
    batch_size=64,
    test_batch_size=1000,
    epochs=1,
    lr=1.0,
    gamma=0.7,
    no_cuda=False,
    seed=1,
    log_interval=100,
    save_model=True
)

run.log_params(vars(args))

## Initialize Model, DataLoaders, Loss, Optimizer

In [None]:
use_cuda = not args.no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
num_classes = len(idx2label)
set_random_seed(args.seed)

In [None]:
# Make train and test dataloaders
train_dataloader = make_dataloader(
      train_dataset,
      batch_size=args.batch_size, 
      pin_memory=use_cuda, 
      shuffle=True
)
train_dataloader_for_eval = make_dataloader(
      train_dataset,
      batch_size=args.test_batch_size, 
      pin_memory=use_cuda, 
      shuffle=False
)
test_dataloader = make_dataloader(
      test_dataset,
      batch_size=args.test_batch_size, 
      pin_memory=use_cuda, 
      shuffle=False
)

In [None]:
# Initialize model and loss
model = Net(num_classes=num_classes)
criterion = torch.nn.NLLLoss()

In [None]:
total_steps = args.epochs * len(train_dataloader)
optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
model = model.to(device)
global_step = 0

## The Training loop
## Here we will log metrics, plots and sample images

In [None]:
for epoch in tqdm(range(1, args.epochs + 1), desc="epochs"):
    epoch_start_time = timer()
    epoch_loss = torch.tensor(0.0).to(device)
    for _step, (batch_input, batch_target) in tqdm(enumerate(train_dataloader), total=len(train_dataloader), desc="steps"):
        model.train()
        batch_input = batch_input.to(device)
        batch_target = batch_target.to(device)
        batch_predicted = model(batch_input)
        loss = criterion(batch_predicted, batch_target)        
        
        loss.backward()
        epoch_loss += loss.item()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()
        model.zero_grad()

        global_step += 1
        
        if global_step % args.log_interval == 0:
            #################### Logging Metrics ###############################
            step_metrics = {
                'step/lr': scheduler.get_last_lr()[0],
                'train/step/loss': loss.item(),
            }
            _, y_true_train, y_pred_train = get_y(model, device, train_dataloader_for_eval)
            step_metrics.update(get_metrics(y_true_train, y_pred_train, prefix="train/step"))
            test_loss, y_true_test, y_pred_test = get_y(model, device, test_dataloader)
            step_metrics.update(get_metrics(y_true_test, y_pred_test, prefix="test/step", loss=test_loss))

            print(f'epoch={epoch} step={global_step}', step_metrics)
            run.log_metrics(step_metrics, step=global_step)
    
    scheduler.step()


    ###################### Logging Metrics #####################################
    epoch_loss = epoch_loss.item() / len(train_dataloader)
    epoch_time = timer() - epoch_start_time
    epoch_metrics = {
        'epoch/epoch': epoch,
        'epoch/lr': scheduler.get_last_lr()[0],
        'train/epoch/loss': epoch_loss,
        'epoch/time': epoch_time
    }
    _, y_true_train, y_pred_train = get_y(model, device, train_dataloader_for_eval)
    epoch_metrics.update(get_metrics(y_true_train, y_pred_train, prefix="train/epoch"))
    test_loss, y_true_test, y_pred_test = get_y(model, device, test_dataloader)
    epoch_metrics.update(get_metrics(y_true_test, y_pred_test, prefix="test/epoch", loss=test_loss))
    print(f'epoch={epoch} step={global_step}', epoch_metrics)

    run.log_metrics(epoch_metrics, step=global_step)


    ###################### Logging Plots #######################################
    train_report_plt, train_cm_plt = get_plots(y_true=y_true_train, y_pred=y_pred_train, labels=idx2label)
    test_report_plt, test_cm_plt = get_plots(y_true=y_true_test, y_pred=y_pred_test, labels=idx2label)
    plots = {
        'train_report': train_report_plt,
        'train_confusion_matrix': train_cm_plt,
        'test_report': test_report_plt,
        'test_confusion_matrix': test_cm_plt,
    }

    run.log_plots(plots, step=global_step)
    

    ###################### Logging Images ######################################
    train_images = get_images(dataset=train_dataset, sample=train_sample, model=model, device=device, prefix="train")
    test_images = get_images(dataset=test_dataset, sample=test_sample, model=model, device=device, prefix="test")
    images = {**train_images, **test_images}

    run.log_images(images, step=global_step)

## Log the Model

In [None]:
if args.save_model:
    model = model.to(torch.device("cpu"))
    run.log_model(model, framework="pytorch", step=global_step)
    model = model.to(device)

## Log dataset stats with predictions

In [None]:
columns = [f"pix_{i}_{j}" for i in range(28) for j in range(28)]
train_dataloader = make_dataloader(train_dataset, batch_size=len(train_dataset))
test_dataloader = make_dataloader(test_dataset, batch_size=len(test_dataset))
X_train, _ = next(iter(train_dataloader))
X_test, _ = next(iter(test_dataloader))
X_train = pd.DataFrame(X_train.reshape(len(X_train), -1).numpy(), columns=columns)
X_test = pd.DataFrame(X_test.reshape(len(X_test), -1).numpy(), columns=columns)

X_train.head()

In [None]:
run.log_dataset(
    dataset_name='train',
    features=X_train[:500],
    predictions=y_pred_train[:500],
    actuals=y_true_train[:500],
    only_stats=True,
)

run.log_dataset(
    dataset_name='test',
    features=X_test[:500],
    predictions=y_pred_test[:500],
    actuals=y_true_test[:500],
    only_stats=True
)

## End the run

In [None]:
run.end()

# Deployment 🚀

In [None]:
! pip install --quiet "torch>=1.2.0,<2.0.0" fastapi==0.78.0 python-multipart==0.0.5 scikit-image==0.19.3 gradio==3.0.24
! pip install --quiet -U "servicefoundry>=0.1.69,<0.2.0" "mlfoundry>=0.3.33,<0.4.0" 

In [None]:
import servicefoundry.core as sfy
sfy.login()

In [None]:
WORKSPACE = 'v1:local:chirag-1'

## Deploy web service from predict function

In [None]:
%%writefile predict.py

import os
import json
import torch
import numpy as np
import mlfoundry as mlf
from PIL import Image
from io import BytesIO
import fastapi
import skimage.transform

client = mlf.get_client(api_key=os.environ.get('TFY_API_KEY'))
run = client.get_run("chiragjn/fashion-mnist-demo-trial/pytorch-cnn-2")
model = run.get_model(map_location=torch.device('cpu'))

config_path = run.download_artifact("config.json")
with open(config_path) as f:
    config = json.load(f)

async def predict(image: fastapi.UploadFile = fastapi.File(...)):
    data = await image.read()
    np_image = np.array(Image.open(BytesIO(data)))
    image = skimage.transform.resize(np_image, (28, 28))
    image = image.reshape(1, 1, 28, 28)  # add a batch dimension
    labels = config['idx2label']
    model.eval()
    with torch.no_grad():
        log_probs = model(torch.Tensor(image))[0]
    scores = np.exp(log_probs.detach().numpy()).tolist()
    return {'predictions': dict(zip(labels, scores))}

In [None]:
requirements = sfy.gather_requirements("predict.py")
print(requirements)

In [None]:
fastapi_service = sfy.Service("predict.py", requirements, sfy.Parameters(
    name="fashion-mnist-fastapi-service",
    workspace=WORKSPACE,
))

In [None]:
fastapi_service.deploy()

## Deploy model as a Gradio App

In [None]:
%%writefile webapp.py

import os
import json
import torch
import numpy as np
import pandas as pd
import mlfoundry as mlf
import gradio as gr

client = mlf.get_client(api_key=os.environ.get('TFY_API_KEY'))
run = client.get_run("chiragjn/fashion-mnist-demo-trial/pytorch-cnn-2")
model = run.get_model(map_location=torch.device('cpu'))

config_path = run.download_artifact("config.json")
with open(config_path) as f:
    config = json.load(f)

def classify(image):
    image = image[:, :, 0].reshape(1, 1, 28, 28)  #add a batch dimension
    labels = config['idx2label']
    model.eval()
    with torch.no_grad():
        log_probs = model(torch.Tensor(image))[0]
    scores = np.exp(log_probs.detach().numpy()).tolist()
    return dict(zip(labels, scores))


inputs = gr.inputs.Image(shape=(28, 28))
outputs = gr.outputs.Label(num_top_classes=10)

app = gr.Interface(
    fn=classify, 
    inputs=inputs, 
    outputs=outputs, 
    title="Fashion MNIST Predictor"
)

In [None]:
requirements = sfy.gather_requirements("webapp.py")
print(requirements)

In [None]:
gradio_webapp = sfy.Gradio("webapp.py", requirements, sfy.Parameters(
    name="fashion-mnist-demo",
    workspace=WORKSPACE
))

In [None]:
gradio_webapp.deploy()

## Deploy web service from predict function (image as str)

In [None]:
%%writefile predict.py

import os
import json
import base64
import io
import numpy as np
from PIL import Image
import torch
import mlfoundry as mlf

client = mlf.get_client(api_key=os.environ.get('TFY_API_KEY'))
run = client.get_run("chiragjn/fashion-mnist-demo-trial/pytorch-cnn-2")
model = run.get_model(map_location=torch.device('cpu'))

config_path = run.download_artifact("config.json")
with open(config_path) as f:
    config = json.load(f)


def predict(image: str):
    data = base64.b64decode(image)
    image = np.array(Image.open(io.BytesIO(data)))
    image = image.reshape(1, 1, 28, 28)

    labels = config['idx2label']
    model.eval()
    with torch.no_grad():
        log_probs = model(torch.Tensor(image))[0]
    scores = np.exp(log_probs.detach().numpy()).tolist()
    return dict(zip(labels, scores))

In [None]:
requirements = sfy.gather_requirements("predict.py")
print(requirements)

In [None]:
auto_service = sfy.Service("predict.py", requirements, sfy.Parameters(
    name="fashion-mnist-service",
    workspace=WORKSPACE,
))

In [None]:
auto_service.deploy()