In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import random_split, DataLoader

import torchvision as tv
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import yaml

import torchvision as tv
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.cuda.amp import autocast, GradScaler

In [12]:
yaml_path = "config.yml"
with open(yaml_path, "r") as yaml_file:
    options = yaml.safe_load(yaml_file)

In [13]:
options

{'batch_size': 32,
 'train_path': 'C://img//dogs_vs_cats//train',
 'test_path': 'C://img//Dogs and Cats//dataset//test_set',
 'network': {'input_size': 3,
  'output_size': 1,
  'loss': 'nn.BCEWithLogitsLoss()',
  'benchmark': True,
  'use_amp': False}}

In [14]:
class Dataset2class(torch.utils.data.Dataset):
    def __init__(self, path_dir1:str):
        super().__init__()

        self.path_dir1 = path_dir1
        self.dir1_list = sorted(os.listdir(path_dir1))

    def __len__(self):
        return len(self.dir1_list)

    def __getitem__(self, key):
        if self.dir1_list[key].split(".")[0] == "cat":
            id_class = 1
            img_path = os.path.join(self.path_dir1, self.dir1_list[key])
        else: 
            id_class = 0
            img_path = os.path.join(self.path_dir1, self.dir1_list[key])
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.astype(np.float32)
        img = img / 255.0
        img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)
        img = img.transpose((2, 0, 1))
        t_img = torch.from_numpy(img)
        t_class_id = torch.tensor([id_class])

        return {"img" : t_img,
                "labels" : t_class_id}

In [15]:
class ConvNet(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.activation = nn.LeakyReLU(0.1)
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv0 = nn.Conv2d(input_size, 64, 3, stride=1, padding=0)
        self.conv1 = nn.Conv2d(64, 64, 3, stride=1, padding=0)
        self.conv2 = nn.Conv2d(64, 64, 3, stride=1, padding=0)
        self.conv3 = nn.Conv2d(64, 128, 3, stride=1, padding=0)
        self.flatten = nn.Flatten()
        self.adaptivepool = nn.AdaptiveAvgPool2d((1, 1))
        self.linear1 = nn.Linear(128, 10)
        self.dropout = nn.Dropout2d(0.5)
        self.linear2 = nn.Linear(10, output_size)

    def forward(self, x):
        out = self.conv0(x)
        out = self.activation(out)
        #print(out.shape)
        out = self.conv1(out)
        out = self.activation(out)
        #print(out.shape)
        out = self.conv2(out)
        out = self.activation(out)
        out = self.maxpool(out)
        #print(out.shape)
        out = self.conv3(out)
        out = self.activation(out)

        #print(out.shape)
        out = self.adaptivepool(out)
        out = self.flatten(out)
        #print(out.shape)
        out = self.linear1(out)
        out = self.activation(out)
        out = self.dropout(out)
        out = self.linear2(out)

        return out


In [16]:
def accuracy(labels, predictions, threshold=0.5):
    """
    Computes accuracy between binary labels and predictions.

    Parameters:
    - labels (torch.Tensor): Binary ground truth labels (0 or 1).
    - predictions (torch.Tensor): Predicted values (e.g., output of a sigmoid activation).
    - threshold (float): Threshold for converting predictions to binary values.

    Returns:
    - float: Accuracy value.
    """
    binary_predictions = (torch.sigmoid(predictions) > threshold).to(torch.float32)
    correct_predictions = (binary_predictions == labels).to(torch.float32)
    accuracy_value = correct_predictions.mean().item()
    return accuracy_value

In [17]:
train_path = options["train_path"]
test_path = options["test_path"]
train_ds_cats_dogs = Dataset2class(train_path)
test_ds_cats_dogs = Dataset2class(test_path)

In [18]:
batch_size = options["batch_size"]
train_size = int(0.85 * len(train_ds_cats_dogs))
val_size = len(train_ds_cats_dogs) - train_size

train_data, val_data = random_split(train_ds_cats_dogs, [train_size, val_size])

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False,  num_workers=0, drop_last=True)
test_loader = torch.utils.data.DataLoader(test_ds_cats_dogs, batch_size=batch_size, num_workers=0, shuffle=True, drop_last=False)

In [19]:
device = "cuda" if torch.cuda.is_available() else "cpu"
input_size = options["network"]["input_size"]
output_size = options["network"]["output_size"]
model = ConvNet(input_size, output_size)
model = model.to(device)
print(device)
print(model)

cuda
ConvNet(
  (activation): LeakyReLU(negative_slope=0.1)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (adaptivepool): AdaptiveAvgPool2d(output_size=(1, 1))
  (linear1): Linear(in_features=128, out_features=10, bias=True)
  (dropout): Dropout2d(p=0.5, inplace=False)
  (linear2): Linear(in_features=10, out_features=1, bias=True)
)


In [20]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [21]:
print(count_parameters(model))

150805


In [23]:
loss_fn = nn.BCEWithLogitsLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))

In [24]:
use_amp = options["network"]["use_amp"]
scaler = torch.cuda.amp.GradScaler()
torch.backends.cudnn.benchmark = options["network"]["benchmark"] #проверяет сначала какой алгоритм вычисления тензоров
#эффективный и потом по нему производит все вычисления. Необходим фиксированный размер фотографии
torch.backends.cudnn.deterministic = False #работает как seed=42

In [25]:
epochs = 2
for epoch in range(epochs):
    loss_val = 0
    loss_train = 0
    accuracy_val = 0
    accuracy_train = 0
    
    model.train()
    for sample in (pbar := tqdm(train_loader)):
        train_img = sample["img"].to(device)
        train_labels = sample["labels"].float().to(device)  # One-hot encode the labels

        optimizer.zero_grad()
        
        with autocast(use_amp):
            pred = model(train_img)
            loss = loss_fn(pred, train_labels)
        
        if device == "cuda" and use_amp:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()
        
        with torch.no_grad():
            loss_item_train = loss.item()
            loss_train += loss_item_train

            accuracy_current_train = accuracy(train_labels, pred)
            accuracy_train += accuracy_current_train
        pbar.set_description(f"loss_train: {loss_item_train:.3f}, accuracy_train: {accuracy_current_train:.3f}")
        
    model.eval()
    with torch.no_grad():
        for sample_val in tqdm(val_loader):
            img_val = sample_val["img"].to(device)
            label_val = sample_val["labels"].float().to(device)  # One-hot encode the labels
            
            with autocast(use_amp):
                pred_val = model(img_val)
                loss_fn_val = loss_fn(pred_val, label_val)

                loss_item_val = loss_fn_val.item()
                loss_val += loss_item_val

                accuracy_current_val = accuracy(label_val, pred_val)
                accuracy_val += accuracy_current_val

    print(f"Epoch {epoch + 1} - Train Loss: {loss_train / len(train_loader):.3f}, Train Accuracy: {accuracy_train / len(train_loader):.3f}")
    print(f"Epoch {epoch + 1} - Validation Loss: {loss_val / len(val_loader):.3f}, Validation Accuracy: {accuracy_val / len(val_loader):.3f}")

loss_train: 0.694, accuracy_train: 0.531:   3%|▎         | 20/664 [00:11<06:14,  1.72it/s]


KeyboardInterrupt: 

In [58]:
loss_val = 0
accuracy_val = 0
model.eval()
with torch.no_grad():
    for sample in test_loader:
        img, labels = sample["img"].to(device), sample["labels"].float().to(device)

        preds = model(img)
        loss = loss_fn(preds, labels)

        loss_item = loss.item()
        loss_val += loss_item
        accuracy_current = accuracy(labels, preds)
        accuracy_val += accuracy_current
print(loss_val/len(test_loader))
print(accuracy_val/len(test_loader))

0.5197481782663436
0.7405753968253969


In [59]:
torch.save({'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss_fn,
            "epoch" : epochs},
            "best_model")

In [60]:
best_model = ConvNet()
best_model = best_model.to("cpu")
optimizer = torch.optim.Adam(best_model.parameters())

checkpoint = torch.load("best_model")
best_model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
loss = checkpoint['loss']

In [61]:
def check_img(path):
    img = cv2.imread(path, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32)
    img = img / 255.0
    img = cv2.resize(img, (256, 256), interpolation=cv2.INTER_AREA)
    img = img.transpose((2, 0, 1))
    t_img = torch.from_numpy(img)
    return t_img


In [63]:
best_model.eval()
with torch.no_grad():
    print(torch.sigmoid(best_model(check_img("C://img//Dogs and Cats//dataset//single_prediction//banana_dog.jpg").unsqueeze(0))))


tensor([[0.6550]])
