In [31]:
!pip install opencv-python

Collecting opencv-python
  Downloading opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl (38.6 MB)
   ---------------------------------------- 0.0/38.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/38.6 MB 1.3 MB/s eta 0:00:31
   ---------------------------------------- 0.2/38.6 MB 2.0 MB/s eta 0:00:20
   ---------------------------------------- 0.3/38.6 MB 2.6 MB/s eta 0:00:15
    --------------------------------------- 0.8/38.6 MB 4.3 MB/s eta 0:00:09
   - -------------------------------------- 1.8/38.6 MB 8.4 MB/s eta 0:00:05
   ---- ----------------------------------- 4.1/38.6 MB 15.5 MB/s eta 0:00:03
   ----- ---------------------------------- 5.2/38.6 MB 18.6 MB/s eta 0:00:02
   --------- ------------------------------ 9.4/38.6 MB 26.2 MB/s eta 0:00:02
   ------------ --------------------------- 11.7/38.6 MB 50.4 MB/s eta 0:00:01
   -------------- ------------------------- 13.9/38.6 MB 50.

In [32]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import random_split, DataLoader

import torchvision as tv
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision as tv
import numpy as np
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.cuda.amp import autocast, GradScaler

In [64]:
class ResBlock(nn.Module):
    def __init__(self, num_channel):
        super().__init__()
        self.conv0 = nn.Conv2d(num_channel, num_channel, kernel_size=3, padding="same")
        self.norm0 = nn.BatchNorm2d(num_channel)
        self.activation = nn.LeakyReLU(0.2)
        self.conv1 = nn.Conv2d(num_channel, num_channel, kernel_size=3, padding="same")
        self.norm1 = nn.BatchNorm2d(num_channel)

    def forward(self, x):
        out = self.conv0(x)
        out = self.norm0(out)
        out = self.activation(out)
        out = self.conv1(out)
        out = self.norm1(out)
        out = self.activation(x + out)
        return out 

In [65]:
class BottleNeckBlock(nn.Module):
    def __init__(self, num_channel):
        super().__init__()
        self.conv0 = nn.Conv2d(num_channel, num_channel // 4, kernel_size=1)
        self.norm0 = nn.BatchNorm2d(num_channel // 4)
        self.activation = nn.LeakyReLU(0.2)
        self.conv1 = nn.Conv2d(num_channel // 4, num_channel // 4, kernel_size=3, padding="same")
        self.norm1 = nn.BatchNorm2d(num_channel // 4)
        self.conv2 = nn.Conv2d(num_channel // 4, num_channel, kernel_size=1)

    def forward(self, x):
        out = self.conv0(x)
        out = self.norm0(out)
        out = self.activation(out)
        out = self.conv1(out)
        out = self.norm1(out)
        out = self.activation(out)
        out = self.conv2(out)
        out = x + out
        return out 

In [66]:
class ResTruck(nn.Module):
    def __init__(self, num_channel, num_blocks, block_type = "classic"):
        super().__init__()
        
        truck = []
        for i in range(num_blocks):
            if block_type == "classic":
                truck += [ResBlock(num_channel)]
            else:
                truck += [BottleNeckBlock(num_channel)]

        self.block = nn.Sequential(*truck)

    def forward(self, x):
        return self.block(x)

In [67]:
class ResNet(nn.Module):
    def __init__(self, input_num_channel, num_channel, exit_num_channel):
        super().__init__()
        self.conv0 = nn.Conv2d(input_num_channel, num_channel, kernel_size=5, stride=2)
        
        self.activation = nn.LeakyReLU(0.2)
        self.maxpool = nn.MaxPool2d(2, 2)
        
        self.block1 = ResTruck(num_channel, 2)
        self.conv1 = nn.Conv2d(num_channel, 2*num_channel, kernel_size=3, padding=1, stride=2)
        self.block2 = ResTruck(2*num_channel, 2, "nonclassic")
        self.conv2 = nn.Conv2d(2*num_channel, 4*num_channel, kernel_size=3, padding=1, stride=2)
        self.block3 = ResTruck(4*num_channel, 3, "nonclassic")
        self.conv3 = nn.Conv2d(4*num_channel, 4*num_channel, kernel_size=3, padding=1, stride=2)
        self.block4 = ResTruck(4*num_channel, 2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten = nn.Flatten()
        self.linear = nn.Linear(4*num_channel, exit_num_channel)
        
    def forward(self, x):
        out = self.conv0(x)
        out = self.activation(out)
        out = self.maxpool(out)

        out = self.block1(out)
        out = self.conv1(out)
        out = self.block2(out)
        out = self.conv2(out)
        out = self.block3(out)
        out = self.conv3(out)
        out = self.block4(out)

        out = self.avgpool(out)
        out = self.flatten(out)
        out = self.linear(out)
        return out

In [36]:
class Dataset2class(torch.utils.data.Dataset):
    def __init__(self, path_dir1:str):
        super().__init__()

        self.path_dir1 = path_dir1
        self.dir1_list = sorted(os.listdir(path_dir1))

    def __len__(self):
        return len(self.dir1_list)

    def __getitem__(self, key):
        if self.dir1_list[key].split(".")[0] == "cat":
            id_class = 1
            img_path = os.path.join(self.path_dir1, self.dir1_list[key])
        else: 
            id_class = 0
            img_path = os.path.join(self.path_dir1, self.dir1_list[key])
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.astype(np.float32)
        img = img / 255.0
        img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)
        img = img.transpose((2, 0, 1))
        t_img = torch.from_numpy(img)
        t_class_id = torch.tensor([id_class])

        return {"img" : t_img,
                "labels" : t_class_id}

In [37]:
def accuracy(labels, predictions, threshold=0.5):
    """
    Computes accuracy between binary labels and predictions.

    Parameters:
    - labels (torch.Tensor): Binary ground truth labels (0 or 1).
    - predictions (torch.Tensor): Predicted values (e.g., output of a sigmoid activation).
    - threshold (float): Threshold for converting predictions to binary values.

    Returns:
    - float: Accuracy value.
    """
    binary_predictions = (torch.sigmoid(predictions) > threshold).to(torch.float32)
    correct_predictions = (binary_predictions == labels).to(torch.float32)
    accuracy_value = correct_predictions.mean().item()
    return accuracy_value

In [68]:
train_path = "C://img//dogs_vs_cats//train"
test_path = "C://img//Dogs and Cats//dataset//test_set"
train_ds_cats_dogs = Dataset2class(train_path)
test_ds_cats_dogs = Dataset2class(test_path)

In [69]:
batch_size = 64
train_size = int(0.85 * len(train_ds_cats_dogs))
val_size = len(train_ds_cats_dogs) - train_size

train_data, val_data = random_split(train_ds_cats_dogs, [train_size, val_size])

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False,  num_workers=0, drop_last=True)
test_loader = torch.utils.data.DataLoader(test_ds_cats_dogs, batch_size=batch_size, num_workers=0, shuffle=True, drop_last=False)

In [70]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [71]:
torch.cuda.is_available()

True

In [77]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = ResNet(3, 64, 1)
model = model.to(device)
print(model)
print(count_parameters(model))
print(device)

ResNet(
  (conv0): Conv2d(3, 64, kernel_size=(5, 5), stride=(2, 2))
  (activation): LeakyReLU(negative_slope=0.2)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (block1): ResTruck(
    (block): Sequential(
      (0): ResBlock(
        (conv0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
        (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activation): LeakyReLU(negative_slope=0.2)
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): ResBlock(
        (conv0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
        (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activation): LeakyReLU(negative_slope=0.2)
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 

In [78]:
loss_fn = nn.BCEWithLogitsLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))

In [79]:
use_amp = True
scaler = torch.cuda.amp.GradScaler()
torch.backends.cudnn.benchmark = True #проверяет сначала какой алгоритм вычисления тензоров
#эффективный и потом по нему производит все вычисления. Необходим фиксированный размер фотографии
torch.backends.cudnn.deterministic = False #работает как seed=42

In [80]:
epochs = 10
for epoch in range(epochs):
    loss_val = 0
    loss_train = 0
    accuracy_val = 0
    accuracy_train = 0
    
    model.train()
    for sample in (pbar := tqdm(train_loader)):
        train_img = sample["img"].to(device)
        train_labels = sample["labels"].float().to(device) 

        optimizer.zero_grad()
        
        with autocast(use_amp):
            pred = model(train_img)
            loss = loss_fn(pred, train_labels)
        
        if device == "cuda" and use_amp:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()
        
        with torch.no_grad():
            loss_item_train = loss.item()
            loss_train += loss_item_train

            accuracy_current_train = accuracy(train_labels, pred)
            accuracy_train += accuracy_current_train
        pbar.set_description(f"loss_train: {loss_item_train:.3f}, accuracy_train: {accuracy_current_train:.3f}")
        
    model.eval()
    with torch.no_grad():
        for sample_val in tqdm(val_loader):
            img_val = sample_val["img"].to(device)
            label_val = sample_val["labels"].float().to(device) 
            
            with autocast(use_amp):
                pred_val = model(img_val)
                loss_fn_val = loss_fn(pred_val, label_val)

                loss_item_val = loss_fn_val.item()
                loss_val += loss_item_val

                accuracy_current_val = accuracy(label_val, pred_val)
                accuracy_val += accuracy_current_val

    print(f"Epoch {epoch + 1} - Train Loss: {loss_train / len(train_loader):.3f}, Train Accuracy: {accuracy_train / len(train_loader):.3f}")
    print(f"Epoch {epoch + 1} - Validation Loss: {loss_val / len(val_loader):.3f}, Validation Accuracy: {accuracy_val / len(val_loader):.3f}")

loss_train: 0.641, accuracy_train: 0.625: 100%|██████████| 332/332 [01:50<00:00,  3.01it/s]
100%|██████████| 58/58 [00:18<00:00,  3.09it/s]


Epoch 1 - Train Loss: 0.660, Train Accuracy: 0.608
Epoch 1 - Validation Loss: 0.714, Validation Accuracy: 0.551


loss_train: 0.640, accuracy_train: 0.688: 100%|██████████| 332/332 [01:51<00:00,  2.97it/s]
100%|██████████| 58/58 [00:16<00:00,  3.53it/s]


Epoch 2 - Train Loss: 0.571, Train Accuracy: 0.702
Epoch 2 - Validation Loss: 0.593, Validation Accuracy: 0.662


loss_train: 0.451, accuracy_train: 0.797: 100%|██████████| 332/332 [01:40<00:00,  3.30it/s]
100%|██████████| 58/58 [00:16<00:00,  3.54it/s]


Epoch 3 - Train Loss: 0.504, Train Accuracy: 0.749
Epoch 3 - Validation Loss: 0.628, Validation Accuracy: 0.658


loss_train: 0.520, accuracy_train: 0.734: 100%|██████████| 332/332 [01:40<00:00,  3.31it/s]
100%|██████████| 58/58 [00:16<00:00,  3.51it/s]


Epoch 4 - Train Loss: 0.451, Train Accuracy: 0.789
Epoch 4 - Validation Loss: 0.470, Validation Accuracy: 0.796


loss_train: 0.401, accuracy_train: 0.812: 100%|██████████| 332/332 [01:40<00:00,  3.31it/s]
100%|██████████| 58/58 [00:16<00:00,  3.60it/s]


Epoch 5 - Train Loss: 0.388, Train Accuracy: 0.827
Epoch 5 - Validation Loss: 0.580, Validation Accuracy: 0.736


loss_train: 0.386, accuracy_train: 0.859: 100%|██████████| 332/332 [01:39<00:00,  3.34it/s]
100%|██████████| 58/58 [00:16<00:00,  3.59it/s]


Epoch 6 - Train Loss: 0.317, Train Accuracy: 0.860
Epoch 6 - Validation Loss: 0.478, Validation Accuracy: 0.780


loss_train: 0.349, accuracy_train: 0.859: 100%|██████████| 332/332 [01:39<00:00,  3.33it/s]
100%|██████████| 58/58 [00:16<00:00,  3.54it/s]


Epoch 7 - Train Loss: 0.270, Train Accuracy: 0.886
Epoch 7 - Validation Loss: 0.324, Validation Accuracy: 0.856


loss_train: 0.093, accuracy_train: 1.000: 100%|██████████| 332/332 [01:39<00:00,  3.33it/s]
100%|██████████| 58/58 [00:16<00:00,  3.58it/s]


Epoch 8 - Train Loss: 0.236, Train Accuracy: 0.903
Epoch 8 - Validation Loss: 0.358, Validation Accuracy: 0.835


loss_train: 0.217, accuracy_train: 0.906: 100%|██████████| 332/332 [01:41<00:00,  3.28it/s]
100%|██████████| 58/58 [00:17<00:00,  3.39it/s]


Epoch 9 - Train Loss: 0.198, Train Accuracy: 0.917
Epoch 9 - Validation Loss: 0.241, Validation Accuracy: 0.898


loss_train: 0.184, accuracy_train: 0.938: 100%|██████████| 332/332 [01:41<00:00,  3.29it/s]
100%|██████████| 58/58 [00:16<00:00,  3.59it/s]

Epoch 10 - Train Loss: 0.179, Train Accuracy: 0.927
Epoch 10 - Validation Loss: 0.317, Validation Accuracy: 0.860





In [81]:
torch.save({'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss_fn}, "current_model")

In [83]:
current_model = ResNet(3, 64, 1)
current_model = current_model.to(device)
optimizer = torch.optim.Adam(current_model.parameters())

checkpoint = torch.load("current_model")
current_model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
loss = checkpoint['loss']

In [61]:
loss_val = 0
accuracy_val = 0
current_model.eval()
for sample in test_loader:
    img, labels = sample["img"].to(device), sample["labels"].float().to(device)
    with torch.no_grad():
        pred = current_model(img)
        loss = loss_fn(pred, labels)

    loss_item = loss.item()
    loss_val += loss_item
    accuracy_current = accuracy(labels, pred)
    accuracy_val += accuracy_current
print(f"test_loss: {loss_val/len(test_loader)}")
print(f"test_accuracy: {accuracy_val/len(test_loader)}")

test_loss: 0.1779381421705087
test_accuracy: 0.9255952380952381


In [84]:
loss_val = 0
accuracy_val = 0
current_model.eval()
for sample in test_loader:
    img, labels = sample["img"].to(device), sample["labels"].float().to(device)
    with torch.no_grad():
        pred = current_model(img)
        loss = loss_fn(pred, labels)

    loss_item = loss.item()
    loss_val += loss_item
    accuracy_current = accuracy(labels, pred)
    accuracy_val += accuracy_current
print(f"test_loss: {loss_val/len(test_loader)}")
print(f"test_accuracy: {accuracy_val/len(test_loader)}")

test_loss: 0.2917527062818408
test_accuracy: 0.884765625


In [85]:
def check_img(path):
    img = cv2.imread(path, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32)
    img = img / 255.0
    img = cv2.resize(img, (256, 256), interpolation=cv2.INTER_AREA)
    img = img.transpose((2, 0, 1))
    t_img = torch.from_numpy(img)
    return t_img

In [87]:
current_model.eval()
current_model.to("cpu")
with torch.no_grad():
    print(torch.sigmoid(current_model(check_img("C://img//Dogs and Cats//dataset//single_prediction//banana_dog.jpg").unsqueeze(0))))

tensor([[0.0148]])
