In [1]:
import timm

timm.list_models("resnet50")

['resnet50']

In [2]:
resnet50 = timm.create_model("resnet50")
efficientnet_b0 = timm.create_model("efficientnet_b0")

In [3]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad) / 1024 / 1024

count_parameters(resnet50)

24.373085021972656

In [4]:
count_parameters(efficientnet_b0)

5.043552398681641

In [3]:
import torch

x = torch.randn(12, 3, 224, 224)

In [6]:
%timeit resnet50(x)

927 ms ± 19 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
efficientnet_v2 = timm.create_model("efficientnetv2_s")

In [8]:
count_parameters(efficientnet_v2)

20.46440887451172

In [9]:
%timeit efficientnet_v2(x)

1.22 s ± 28.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
efficientnet_b2 = timm.create_model("efficientnet_b2")
count_parameters(efficientnet_b2)

8.687967300415039

In [11]:
%timeit efficientnet_b2(x)

839 ms ± 27.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
resnet50

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act2): ReLU(inplace=True)
      (aa): Identity()
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     

In [14]:
dir(resnet50)

['T_destination',
 '__annotations__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_backward_hooks',
 '_backward_pre_hooks',
 '_buffers',
 '_call_impl',
 '_compiled_call_impl',
 '_forward_hooks',
 '_forward_hooks_always_called',
 '_forward_hooks_with_kwargs',
 '_forward_pre_hooks',
 '_forward_pre_hooks_with_kwargs',
 '_get_backward_hooks',
 '_get_backward_pre_hooks',
 '_get_name',
 '_is_full_backward_hook',
 '_load_from_state_dict',
 '_load_state_dict_post_hooks',
 '_load_state_dict_pre_hooks',
 '_maybe_warn_non_full_backward_hook',
 '_modules',
 '_named_members',
 '_non_persistent_buffers_se

In [16]:
[x[0] for x in resnet50.named_parameters()]

['conv1.weight',
 'bn1.weight',
 'bn1.bias',
 'layer1.0.conv1.weight',
 'layer1.0.bn1.weight',
 'layer1.0.bn1.bias',
 'layer1.0.conv2.weight',
 'layer1.0.bn2.weight',
 'layer1.0.bn2.bias',
 'layer1.0.conv3.weight',
 'layer1.0.bn3.weight',
 'layer1.0.bn3.bias',
 'layer1.0.downsample.0.weight',
 'layer1.0.downsample.1.weight',
 'layer1.0.downsample.1.bias',
 'layer1.1.conv1.weight',
 'layer1.1.bn1.weight',
 'layer1.1.bn1.bias',
 'layer1.1.conv2.weight',
 'layer1.1.bn2.weight',
 'layer1.1.bn2.bias',
 'layer1.1.conv3.weight',
 'layer1.1.bn3.weight',
 'layer1.1.bn3.bias',
 'layer1.2.conv1.weight',
 'layer1.2.bn1.weight',
 'layer1.2.bn1.bias',
 'layer1.2.conv2.weight',
 'layer1.2.bn2.weight',
 'layer1.2.bn2.bias',
 'layer1.2.conv3.weight',
 'layer1.2.bn3.weight',
 'layer1.2.bn3.bias',
 'layer2.0.conv1.weight',
 'layer2.0.bn1.weight',
 'layer2.0.bn1.bias',
 'layer2.0.conv2.weight',
 'layer2.0.bn2.weight',
 'layer2.0.bn2.bias',
 'layer2.0.conv3.weight',
 'layer2.0.bn3.weight',
 'layer2.0.bn3.b

In [19]:
dir(next(resnet50.named_parameters())[1])

['H',
 'T',
 '__abs__',
 '__add__',
 '__and__',
 '__array__',
 '__array_priority__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__complex__',
 '__contains__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__div__',
 '__dlpack__',
 '__dlpack_device__',
 '__doc__',
 '__eq__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__idiv__',
 '__ifloordiv__',
 '__ilshift__',
 '__imod__',
 '__imul__',
 '__index__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__irshift__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__long__',
 '__lshift__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdiv__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed_

In [10]:
a = torch.randn((10, 10))
a[:5].shape

torch.Size([5, 10])

In [8]:
x1 = resnet50.forward_features(x)
x1.shape

torch.Size([12, 2048, 7, 7])

In [9]:
resnet50.global_pool(x1).shape

torch.Size([12, 2048])

In [10]:
resnet50.num_features

2048

In [11]:
[m for m, k in resnet50.named_parameters()]

['conv1.weight',
 'bn1.weight',
 'bn1.bias',
 'layer1.0.conv1.weight',
 'layer1.0.bn1.weight',
 'layer1.0.bn1.bias',
 'layer1.0.conv2.weight',
 'layer1.0.bn2.weight',
 'layer1.0.bn2.bias',
 'layer1.0.conv3.weight',
 'layer1.0.bn3.weight',
 'layer1.0.bn3.bias',
 'layer1.0.downsample.0.weight',
 'layer1.0.downsample.1.weight',
 'layer1.0.downsample.1.bias',
 'layer1.1.conv1.weight',
 'layer1.1.bn1.weight',
 'layer1.1.bn1.bias',
 'layer1.1.conv2.weight',
 'layer1.1.bn2.weight',
 'layer1.1.bn2.bias',
 'layer1.1.conv3.weight',
 'layer1.1.bn3.weight',
 'layer1.1.bn3.bias',
 'layer1.2.conv1.weight',
 'layer1.2.bn1.weight',
 'layer1.2.bn1.bias',
 'layer1.2.conv2.weight',
 'layer1.2.bn2.weight',
 'layer1.2.bn2.bias',
 'layer1.2.conv3.weight',
 'layer1.2.bn3.weight',
 'layer1.2.bn3.bias',
 'layer2.0.conv1.weight',
 'layer2.0.bn1.weight',
 'layer2.0.bn1.bias',
 'layer2.0.conv2.weight',
 'layer2.0.bn2.weight',
 'layer2.0.bn2.bias',
 'layer2.0.conv3.weight',
 'layer2.0.bn3.weight',
 'layer2.0.bn3.b

In [15]:
[(m, k) for m, k in resnet50.named_modules()]

[('',
  ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (drop_block): Identity()
        (act2): ReLU(inplace=True)
        (aa): Identity()
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affi

In [13]:
dir(resnet50)

['T_destination',
 '__annotations__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_backward_hooks',
 '_backward_pre_hooks',
 '_buffers',
 '_call_impl',
 '_compiled_call_impl',
 '_forward_hooks',
 '_forward_hooks_always_called',
 '_forward_hooks_with_kwargs',
 '_forward_pre_hooks',
 '_forward_pre_hooks_with_kwargs',
 '_get_backward_hooks',
 '_get_backward_pre_hooks',
 '_get_name',
 '_is_full_backward_hook',
 '_load_from_state_dict',
 '_load_state_dict_post_hooks',
 '_load_state_dict_pre_hooks',
 '_maybe_warn_non_full_backward_hook',
 '_modules',
 '_named_members',
 '_non_persistent_buffers_se

In [18]:
[(name, m) for name, m in resnet50.named_modules()][0]

[('',
  ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (drop_block): Identity()
        (act2): ReLU(inplace=True)
        (aa): Identity()
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affi

In [19]:
list(resnet50.named_parameters())

[('conv1.weight',
  Parameter containing:
  tensor([[[[ 2.0205e-02, -2.3905e-03, -2.0692e-03,  ..., -2.1685e-02,
             -2.6140e-02,  3.5137e-02],
            [ 1.1919e-02, -3.9269e-02,  1.0223e-02,  ...,  1.5147e-02,
              2.0462e-02,  3.0057e-02],
            [-5.4055e-06,  3.5655e-02, -3.1106e-02,  ...,  2.5908e-02,
             -8.2757e-03,  9.2408e-03],
            ...,
            [ 3.1922e-03, -2.9917e-02, -6.8023e-04,  ...,  8.9421e-03,
              2.0288e-02, -1.6921e-02],
            [ 9.2214e-03,  2.8107e-02, -3.1560e-03,  ...,  5.4679e-02,
              4.0747e-02,  7.1635e-03],
            [-1.4354e-02,  1.2380e-02,  3.8668e-03,  ...,  1.0404e-02,
              2.7850e-03, -2.4757e-02]],
  
           [[ 1.5083e-02, -4.7006e-02,  1.2246e-03,  ..., -1.4765e-02,
              4.3166e-02,  1.3303e-02],
            [ 3.0001e-02, -1.1653e-02,  2.1985e-02,  ..., -1.8013e-03,
              1.0435e-02, -1.1959e-02],
            [ 7.8141e-03, -1.2133e-02,  1.5068e-0

In [26]:
bn = nn.BatchNorm2d(123)
list(bn.named_children())

[]

In [30]:
l = [[]] * 3
l[0].append(4)
l

[[4], [4], [4]]

In [8]:
import torch.nn as nn

def bn_filter(module_name, module, param_name, param):
    return isinstance(module, (nn.InstanceNorm1d, nn.InstanceNorm2d, nn.InstanceNorm3d, nn.LazyInstanceNorm1d, nn.LazyInstanceNorm2d, nn.LazyInstanceNorm3d, nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.LazyBatchNorm1d, nn.LazyBatchNorm2d, nn.LazyBatchNorm3d))

def bias_filter(module_name, module, param_name, param):
    return param_name == "bias"

def bn_or_bias_filter(module_name, module, param_name, param):
    return bn_filter(module_name, module, param_name, param) or bias_filter(module_name, module, param_name, param)

def pass_all_filter(module_name, module, param_name, param):
    return True

def split_params(model: nn.Module, filters, prefix=""):
    results = []
    for i in range(len(filters)):
        results.append([])
    for module_name, module in model.named_children():
        full_module_name = prefix + module_name
        for param_name, param in module.named_parameters(recurse=False):
            for i, f in enumerate(filters):
                if f(full_module_name, module, param_name, param):
                    results[i].append(param)
                    break
        module_results = split_params(module, filters, full_module_name + ".")
        for i in range(len(filters)):
            results[i] += module_results[i]
    return results

def add_weight_decay(
        model, 
        weight_decay=1e-5):
    params = split_params(model, [bn_or_bias_filter, pass_all_filter])
    return [
        {'params': params[0], 'weight_decay': 0.},
        {'params': params[1], 'weight_decay': weight_decay}]

# result = split_params(resnet50, [bn_or_bias_filter, pass_all_filter])

In [9]:
from torch.optim import AdamW

AdamW(add_weight_decay(resnet50), lr=0.001)

AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0.0

Parameter Group 1
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 1e-05
)

In [7]:
[m[0] for m in result[1]]

['conv1.weight',
 'layer1.0.conv1.weight',
 'layer1.0.conv2.weight',
 'layer1.0.conv3.weight',
 'layer1.0.downsample.0.weight',
 'layer1.1.conv1.weight',
 'layer1.1.conv2.weight',
 'layer1.1.conv3.weight',
 'layer1.2.conv1.weight',
 'layer1.2.conv2.weight',
 'layer1.2.conv3.weight',
 'layer2.0.conv1.weight',
 'layer2.0.conv2.weight',
 'layer2.0.conv3.weight',
 'layer2.0.downsample.0.weight',
 'layer2.1.conv1.weight',
 'layer2.1.conv2.weight',
 'layer2.1.conv3.weight',
 'layer2.2.conv1.weight',
 'layer2.2.conv2.weight',
 'layer2.2.conv3.weight',
 'layer2.3.conv1.weight',
 'layer2.3.conv2.weight',
 'layer2.3.conv3.weight',
 'layer3.0.conv1.weight',
 'layer3.0.conv2.weight',
 'layer3.0.conv3.weight',
 'layer3.0.downsample.0.weight',
 'layer3.1.conv1.weight',
 'layer3.1.conv2.weight',
 'layer3.1.conv3.weight',
 'layer3.2.conv1.weight',
 'layer3.2.conv2.weight',
 'layer3.2.conv3.weight',
 'layer3.3.conv1.weight',
 'layer3.3.conv2.weight',
 'layer3.3.conv3.weight',
 'layer3.4.conv1.weight',


In [31]:
list(resnet50.named_parameters(recurse=False))

[]

In [25]:
result[1]["param_names"]

['.conv1.weight',
 '.bn1.weight',
 '.bn1.bias',
 '.layer1.0.conv1.weight',
 '.layer1.0.bn1.weight',
 '.layer1.0.bn1.bias',
 '.layer1.0.conv2.weight',
 '.layer1.0.bn2.weight',
 '.layer1.0.bn2.bias',
 '.layer1.0.conv3.weight',
 '.layer1.0.bn3.weight',
 '.layer1.0.bn3.bias',
 '.layer1.0.downsample.0.weight',
 '.layer1.0.downsample.1.weight',
 '.layer1.0.downsample.1.bias',
 '.layer1.1.conv1.weight',
 '.layer1.1.bn1.weight',
 '.layer1.1.bn1.bias',
 '.layer1.1.conv2.weight',
 '.layer1.1.bn2.weight',
 '.layer1.1.bn2.bias',
 '.layer1.1.conv3.weight',
 '.layer1.1.bn3.weight',
 '.layer1.1.bn3.bias',
 '.layer1.2.conv1.weight',
 '.layer1.2.bn1.weight',
 '.layer1.2.bn1.bias',
 '.layer1.2.conv2.weight',
 '.layer1.2.bn2.weight',
 '.layer1.2.bn2.bias',
 '.layer1.2.conv3.weight',
 '.layer1.2.bn3.weight',
 '.layer1.2.bn3.bias',
 '.layer2.0.conv1.weight',
 '.layer2.0.bn1.weight',
 '.layer2.0.bn1.bias',
 '.layer2.0.conv2.weight',
 '.layer2.0.bn2.weight',
 '.layer2.0.bn2.bias',
 '.layer2.0.conv3.weight',


In [19]:
x = torch.randn(12, 100)
y = x * 10
x_norm = torch.linalg.norm(x, ord=2, dim=1)
y_norm = torch.linalg.norm(y, ord=2, dim=1)
-2 * torch.mean(torch.sum(x * y, dim=1) / (x_norm * y_norm))

tensor(-2.)

In [31]:
from typing import List

import lightning.pytorch as pl
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
import torchmetrics
from tqdm import tqdm

import models
import utils
from datamodules import Task1Datamodule


class ClassificationCallback(pl.Callback):
    def __init__(
        self,
        datamodule: Task1Datamodule,
        num_epochs: int = 80,
        lr: List[float] = None,
        tclip: bool = True,
        tclip_alpha: float = 10.0,
        weight_decay: float = 1e-2,
        early_stopping: int = 10,
    ):
        super().__init__()
        if lr is None:
            lr = [1e-3]

        self.datamodule = datamodule
        self.num_epochs = num_epochs
        self.lr = lr
        self.tclip = tclip
        self.tclip_alpha = tclip_alpha
        self.weight_decay = weight_decay
        self.early_stopping = early_stopping

    def eval_model(self, backbone: nn.Module, backbone_output_size: int):
        acc = torchmetrics.Accuracy("multiclass", num_classes=models.NUM_CLASSES)
        ap = torchmetrics.AveragePrecision("multiclass", num_classes=models.NUM_CLASSES)
        best_loss = -1000000
        best_lr = None
        best_epoch = None
        best_acc = None
        best_ap = None
        for lr in self.lr:
            fc = nn.Linear(backbone_output_size, models.NUM_CLASSES)
            opt = torch.optim.AdamW(
                utils.add_weight_decay(fc, self.weight_decay), lr=lr
            )
            best_lr_loss = -1000000
            best_lr_epoch = -1

            train_dataloader = self.datamodule.train_dataloader()
            val_dataloader = self.datamodule.val_dataloader()
            if isinstance(val_dataloader, list):
                assert len(val_dataloader) == 1
                val_dataloader = val_dataloader[0]
            tq = tqdm(range(self.num_epochs))
            for epoch in tq:
                num_batches = 0
                for data, labels in train_dataloader:
                    num_batches += 1
                    opt.zero_grad()
                    with torch.no_grad():
                        features = backbone(data["image"])
                    logits = fc(features)
                    if self.tclip:
                        logits = self.tclip_alpha * torch.tanh(
                            logits / self.tclip_alpha
                        )
                    loss = F.cross_entropy(logits, labels)
                    loss.backward()
                    opt.step()
                    tq.set_description(
                        f"Train step {num_batches:04d}/{len(train_dataloader):04d} - loss {loss:.6f}"
                    )

                loss = 0
                num_batches = 0
                for data, labels in val_dataloader:
                    num_batches += 1
                    with torch.no_grad():
                        features = backbone(data["image"])
                        logits = fc(features)
                        if self.tclip:
                            logits = self.tclip_alpha * torch.tanh(
                                logits / self.tclip_alpha
                            )
                        loss += F.cross_entropy(logits, labels)
                        acc.update(logits, labels)
                        ap.update(logits, labels)
                    tq.set_description(
                        f"Eval step {num_batches:04d}/{len(val_dataloader):04d} - loss {loss:.6f}"
                    )
                val_loss = loss / num_batches
                val_acc = acc.compute()
                val_ap = ap.compute()
                acc.reset()
                ap.reset()
                if val_loss < best_loss:
                    best_loss = val_loss
                    best_lr = lr
                    best_epoch = epoch
                    best_acc = val_acc
                    best_ap = val_ap
                if val_loss < best_lr_loss:
                    best_lr_loss = val_loss
                    best_lr_epoch = epoch
                elif epoch - best_lr_epoch > self.early_stopping:
                    break
        return {
            "loss": best_loss,
            "lr": best_lr,
            "epoch": best_epoch,
            "acc": best_acc,
            "ap": best_ap,
        }

    def on_validation_epoch_end(
        self, trainer: pl.Trainer, pl_module: pl.LightningModule
    ):
        backbone = nn.Sequential(pl_module.online_backbone, pl_module.global_pool)

        logs = self.eval_model(backbone, pl_module.hparams.mlp_out_size)

        pl_module.log_dict({f"classification/{k}": v for k, v in logs.items()})
        print(f"Classification {logs}")

import datamodules

datamodule = datamodules.Task1Datamodule("C:/Data/AAIT/task1", num_train_workers=0, num_val_workers=0, num_test_workers=0, batch_size=64, labeled=True, unlabeled=False, val_size=0.2, train_dataset_replicas=2)
datamodule.setup("fit")

callback = ClassificationCallback(datamodule, num_epochs=10, lr=[0.001], tclip=True, tclip_alpha=10., weight_decay=1e-2, early_stopping=10)

class _Backbone(nn.Module):
    def __init__(self, model: nn.Module):
        super().__init__()
        self.backbone = model

    def forward(self, x):
        x = self.backbone.forward_features(x)
        x = self.backbone.global_pool(x)
        return x

resnet50 = timm.create_model("resnet50", pretrained=True)
backbone = _Backbone(resnet50)

callback.eval_model(backbone, 2048)

Train step 0142/2356 - loss 4.558164:   0%|          | 0/10 [00:25<?, ?it/s]


KeyboardInterrupt: 

  0%|          | 0/10 [00:00<?, ?it/s]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (16x1000 and 2048x100)

In [71]:
import torch
import numpy as np
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.preprocessing import StandardScaler
# from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

n = 23555
m = 2048
X = np.random.randn(n, m) * 10 + np.random.randn(n, m) * 5 + 8
y = np.random.randint(0, 100, (n,)).astype(int)
# y_sampled = y
# y_binarized = np.zeros((y.size, 100))
# y_binarized[np.arange(y.size), y] = 1
# y = y_binarized
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier

labels = np.arange(100)
def train_test(algo, pca_n_comp=2048, knn_n_neighbors=20):
    global X_train, X_test, y_train, y_test, labels
    
    scaler = StandardScaler()
    X_train_transformed = scaler.fit_transform(X_train)
    X_test_transformed = scaler.transform(X_test)
    
    if pca_n_comp != X_train.shape[1]:
        pca = PCA(n_components=pca_n_comp)
        X_train_transformed = pca.fit_transform(X_train_transformed)
        X_test_transformed = pca.transform(X_test_transformed)
    
    if algo == "knn":
        knn = KNeighborsClassifier(n_neighbors=knn_n_neighbors)
        knn.fit(X_train_transformed, y_train)
        y_proba = knn.predict_proba(X_test_transformed)
    elif algo == "randomforest":
        model = RandomForestClassifier(class_weight="balanced_subsample")
        model.fit(X_train_transformed, y_train)
        y_proba = model.predict_proba(X_test_transformed)
    elif algo == "xgb":
        pass
    else:
        raise Exception("unknown algo")

In [148]:
%timeit train_test(algo="knn")

4.2 s ± 609 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
%timeit train_test(algo="randomforest")

In [6]:
model = RandomForestClassifier(class_weight="balanced_subsample", n_jobs=12)

In [7]:
model.fit(X_train_transformed, y_train)

In [8]:
y_proba = model.predict_proba(X_test_transformed)

In [72]:
from xgboost.sklearn import XGBClassifier
from sklearn.utils.class_weight import compute_sample_weight

scaler = StandardScaler()
X_train_transformed = scaler.fit_transform(X_train)
X_test_transformed = scaler.transform(X_test)

pca = PCA(n_components=256)
X_train_transformed = pca.fit_transform(X_train_transformed)
X_test_transformed = pca.transform(X_test_transformed)

sample_weights = compute_sample_weight(class_weight='balanced', y=y_train)
xgb = XGBClassifier(early_stopping_rounds=10, tree_method="hist", max_depth=4, n_estimators=100, device="cuda")

In [73]:
xgb.fit(X_train_transformed, y_train, eval_set=[(X_test_transformed, y_test)], verbose=True, sample_weight=sample_weights)

[0]	validation_0-mlogloss:4.61183
[1]	validation_0-mlogloss:4.61640
[2]	validation_0-mlogloss:4.62094
[3]	validation_0-mlogloss:4.62685
[4]	validation_0-mlogloss:4.63390
[5]	validation_0-mlogloss:4.63917
[6]	validation_0-mlogloss:4.64556
[7]	validation_0-mlogloss:4.65126
[8]	validation_0-mlogloss:4.65507
[9]	validation_0-mlogloss:4.66272


In [75]:
y_proba = xgb.predict_proba(X_test_transformed)
y_proba.shape

(4711, 100)

In [None]:
from xgboost import DMatrix

DMatrix()

In [69]:
import timm

m = timm.create_model("resnet34", pretrained=False)
dir(m)

['T_destination',
 '__annotations__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_backward_hooks',
 '_backward_pre_hooks',
 '_buffers',
 '_call_impl',
 '_compiled_call_impl',
 '_forward_hooks',
 '_forward_hooks_always_called',
 '_forward_hooks_with_kwargs',
 '_forward_pre_hooks',
 '_forward_pre_hooks_with_kwargs',
 '_get_backward_hooks',
 '_get_backward_pre_hooks',
 '_get_name',
 '_is_full_backward_hook',
 '_load_from_state_dict',
 '_load_state_dict_post_hooks',
 '_load_state_dict_pre_hooks',
 '_maybe_warn_non_full_backward_hook',
 '_modules',
 '_named_members',
 '_non_persistent_buffers_se

In [70]:
m.device

AttributeError: 'ResNet' object has no attribute 'device'

In [19]:
xgb.predict_proba(X_test_transformed)

array([[0.01011109, 0.00971837, 0.00985839, ..., 0.00944635, 0.01012211,
        0.00957291],
       [0.00997305, 0.00958568, 0.00972379, ..., 0.00931739, 0.00964165,
        0.0103318 ],
       [0.01025263, 0.0098544 , 0.00999638, ..., 0.00957858, 0.00991193,
        0.00970691],
       ...,
       [0.01017627, 0.00978102, 0.00992194, ..., 0.00950725, 0.00903583,
        0.00963462],
       [0.01016838, 0.00977342, 0.00991424, ..., 0.00949987, 0.00983048,
        0.00962714],
       [0.01022303, 0.00982595, 0.00996752, ..., 0.01263243, 0.00988332,
        0.00967888]], dtype=float32)

In [145]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_proba = knn.predict(X_test)
y_proba

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [146]:
np.unique(y_proba)

array([0., 1.])

In [125]:
np.concatenate(y_proba, axis=1).shape

(4711, 199)

In [126]:
[a.shape for a in y_proba]

[(4711, 1),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (4711, 2),
 (47

In [127]:
y_proba[0]

array([[1.],
       [1.],
       [1.],
       ...,
       [1.],
       [1.],
       [1.]])

In [144]:
y_proba[16]

array([[1. , 0. ],
       [0.8, 0.2],
       [1. , 0. ],
       ...,
       [1. , 0. ],
       [1. , 0. ],
       [1. , 0. ]])

In [113]:
%timeit train_test(algo="knn")

AttributeError: 'list' object has no attribute 'shape'

In [70]:
np.unique(y_train)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [69]:
np.unique(y_test)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
       69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85,
       86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [59]:
np.array({1, 2, 3})

array({1, 2, 3}, dtype=object)

In [42]:
labels

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [43]:
X_train.shape[1]

2048

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


ValueError: Number of given labels, 100, not equal to the number of columns in 'y_score', 101

In [63]:
np.insert(np.arange(25).reshape(5, 5), np.array([1, 4, 4]), 0, axis=1)

array([[ 0,  0,  1,  2,  3,  0,  0,  4],
       [ 5,  0,  6,  7,  8,  0,  0,  9],
       [10,  0, 11, 12, 13,  0,  0, 14],
       [15,  0, 16, 17, 18,  0,  0, 19],
       [20,  0, 21, 22, 23,  0,  0, 24]])

In [58]:
np.arange(4)

array([0, 1, 2, 3])

In [15]:
y_test.min()

0

In [16]:
y_hat.min()

0

In [17]:
y_hat.shape

(4711,)

In [19]:
y_test.shape

(4711,)

In [26]:
import pandas as pd

df = pd.read_csv("C:/Data/AAIT/task1/train_data/annotations.csv")

In [27]:
df

Unnamed: 0,sample,label
0,task1/train_data/images/labeled/0.jpeg,0
1,task1/train_data/images/labeled/1.jpeg,1
2,task1/train_data/images/labeled/2.jpeg,2
3,task1/train_data/images/labeled/3.jpeg,3
4,task1/train_data/images/labeled/4.jpeg,4
...,...,...
23550,task1/train_data/images/labeled/23550.jpeg,97
23551,task1/train_data/images/labeled/23551.jpeg,28
23552,task1/train_data/images/labeled/23552.jpeg,53
23553,task1/train_data/images/labeled/23553.jpeg,9


In [32]:
df.groupby("label").count().sort_values("sample")

Unnamed: 0_level_0,sample
label,Unnamed: 1_level_1
96,75
89,149
92,156
75,167
83,170
...,...
82,280
8,280
3,286
5,289


In [33]:
df.groupby("label").count().sort_values("sample", ascending=False)


Unnamed: 0_level_0,sample
label,Unnamed: 1_level_1
52,289
5,289
3,286
8,280
82,280
...,...
83,170
75,167
92,156
89,149


In [67]:
from sklearn.model_selection import StratifiedKFold
import numpy as np

X = np.arange(400).reshape((80, 5))
y = np.sqrt(np.random.randint(0, 16, size=(80,))).astype(int)

skf = StratifiedKFold(n_splits=3)
for i, (train_index, test_index) in enumerate(skf.split(X, y)):
    test_zeros = np.sum(y[test_index] == 0)
    train_zeros = np.sum(y[train_index] == 0)
    print(f"{train_zeros}/{test_zeros}")
    assert test_zeros > 0 and train_zeros > 0

1/1
1/1
2/0




AssertionError: 

In [47]:
np.sort(y)

array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])