In [14]:
from pathlib import Path
from image_modalities_classifier.models.train import ModalityModelTrainer

In [15]:
artifacts_folder =  Path.home() / "Documents/modality_classifiers" 

dataset_filepath = artifacts_folder / "data/updated_cord19/cord19_microscopy_v1.parquet"
num_classes = 3
base_img_dir = "/media/cumulus/curation_data"
output_dir = artifacts_folder / "models"
taxonomy = "cord19"
classifier_name = "microscopy"
project = "cord19"

In [16]:
trainer = ModalityModelTrainer(
    dataset_filepath, base_img_dir, output_dir, taxonomy, classifier_name, project, epochs=10)

In [17]:
import torch
trainer._prepare_data()
trainer._create_artifacts_folder()

train_mean = torch.Tensor([0.5411, 0.4884, 0.5339])
train_std = torch.Tensor([0.3616, 0.3386, 0.3557])

In [18]:
# train_mean, train_std = trainer._calculate_dataset_stats()

In [19]:
train_mean

tensor([0.5411, 0.4884, 0.5339])

In [20]:
datamodule = trainer._create_data_module(train_mean, train_std)

Global seed set to 443


In [21]:
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

lr_monitor = LearningRateMonitor(logging_interval="epoch")
output_run_path = trainer.output_dir

early_stop_callback = EarlyStopping(
    monitor="val_loss",
    min_delta=0.0,
    patience=5,
    verbose=True,
    mode=trainer.mode,
)

checkpoint_callback = ModelCheckpoint(
    dirpath=output_run_path,
    filename=f"{trainer.classifier}_{trainer.version}",
    monitor="val_loss",
    mode=trainer.mode,
    save_top_k=1,
)

num_classes = len(trainer.encoder.classes_)

In [22]:
num_classes

3

In [23]:
from image_modalities_classifier.models.resnet import Resnet

model = Resnet(
    name=trainer.model_name,
    num_classes=num_classes,
    pretrained=True,
    fine_tuned_from="whole",
    lr=trainer.learning_rate,
    metric_monitor="val_loss",
    mode_scheduler=trainer.mode,
    class_weights=datamodule.class_weights,
    mean_dataset=train_mean,
    std_dataset=train_std,
)

In [24]:
model

Resnet(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_runnin

In [25]:
callbacks = [checkpoint_callback, early_stop_callback]
trainer = Trainer(
            gpus=1,
            max_epochs=5,
            callbacks=callbacks,
            deterministic=True,
            logger=None,
            num_sanity_val_steps=0,
        )

  rank_zero_deprecation(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [26]:
train_dataloader = datamodule.train_dataloader()
val_dataloader = datamodule.val_dataloader()
trainer.fit(model, datamodule)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | model | ResNet           | 11.2 M
1 | loss  | CrossEntropyLoss | 0     
-------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.712    Total estimated model params size (MB)


Epoch 0: 100%|██████████| 347/347 [00:34<00:00,  9.96it/s, loss=0.0894]

Metric val_loss improved. New best score: 0.297


Epoch 2: 100%|██████████| 347/347 [00:33<00:00, 10.37it/s, loss=0.0697]

Metric val_loss improved by 0.074 >= min_delta = 0.0. New best score: 0.223


Epoch 4: 100%|██████████| 347/347 [00:33<00:00, 10.38it/s, loss=0.0321]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 347/347 [00:33<00:00, 10.38it/s, loss=0.0321]
