In [6]:
import os
from PIL import Image
import torchvision.transforms as transforms
import torch
import numpy as np

from src.model.fusion import AttentionFusionModule
from src.datamodule.datamodule import ImageForgeryDatamMdule
from timm.models.swin_transformer import SwinTransformer
from src.model.cnn_gru import HybridCNNGRU
from timm.models.layers import SelectAdaptivePool2d, ClassifierHead
import timm
from torch import nn
import torch_dct as dct
from src.lit_model import LitModel

In [7]:
image = torch.rand(3, 3, 224, 224)

In [4]:
LitModel(
    num_classes=2,
    d_model=512,
    pretrain=True,
    requires_grad=True,
    drop_rate=0.0,
    proj_drop_rate=0.0,
    attn_drop_rate=0.0,
    drop_path_rate=0.0,
    hidden_size=512,
    image_size=224,
    patch_size=7,

    learning_rate=1e-4,
    weight_decay=1e-6,
    patience=5,
).forward(image)

torch.Size([3, 7, 7, 768]) torch.Size([3, 7, 7, 512])


tensor([[-0.0133,  0.0979],
        [-0.0203,  0.1181],
        [ 0.0270,  0.1115]], grad_fn=<AddmmBackward0>)

In [53]:
class CNN(nn.Module):
    def __init__(self, input_channels):
        super(CNN, self).__init__()
        self.module = nn.Sequential(
            nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.GELU(),
            nn.AvgPool2d(kernel_size=2, stride=2, padding=1),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.GELU(),
            nn.AvgPool2d(kernel_size=2, stride=2, padding=1),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.GELU(),
            nn.AvgPool2d(kernel_size=2, stride=2, padding=1),

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.GELU(),
            nn.AvgPool2d(kernel_size=2, stride=1, padding=0),

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.SiLU(),
        )

    def forward(self, x):
        x = self.module(x)
        return x


CNN(3)(image).shape

torch.Size([3, 512, 28, 28])

In [52]:
28 / 7

4.0

In [4]:
import torch
import torch.nn as nn

B, W, H, D = 2, 8, 8, 768  # B: batch size, W: chiều rộng, H: chiều cao, D: số chiều đặc trưng
C = 10  # Số lớp (classes)
output = torch.randn(B, W, H, D)  # Đầu ra từ mô hình CNN hoặc tương tự

# Áp dụng Global Average Pooling trên chiều không gian (W, H)
pooled_output = output.mean(dim=(1, 2))  # Kích thước: (B, D)
print("Pooled output:", pooled_output.shape)
# Chuyển thành logits (số lớp)
linear = nn.Linear(D, C)
logits = linear(pooled_output)  # Kích thước: (B, C)

# Tính cross entropy loss
target = torch.randint(0, C, (B,))  # Ground truth labels
loss_fn = nn.CrossEntropyLoss()
loss = loss_fn(logits, target)

print("Loss:", loss.item())


Pooled output: torch.Size([2, 768])
Loss: 2.2533135414123535


In [6]:
nn.Poo(kernel_size=(8, 8))(output)

tensor([[[[ 6.1461e-02,  1.7132e-01, -4.3081e-02,  ...,  1.8428e-01,
            7.9201e-02,  7.7000e-03]],

         [[-7.3590e-02,  3.6203e-02,  3.1155e-02,  ..., -1.8917e-01,
           -6.0701e-02, -1.1432e-02]],

         [[-1.5933e-01, -1.1345e-01,  1.8945e-04,  ...,  7.2752e-02,
            1.4967e-01,  1.9428e-02]],

         ...,

         [[-1.1533e-01, -9.6979e-02, -2.8492e-02,  ...,  9.7934e-03,
           -7.2704e-02,  4.2504e-02]],

         [[-9.3216e-02, -1.5313e-01, -7.4550e-02,  ...,  5.9874e-02,
           -2.6313e-02,  3.4397e-01]],

         [[ 1.0297e-01,  5.2828e-02,  2.3151e-01,  ...,  2.2125e-02,
           -1.2867e-01,  3.0936e-02]]],


        [[[ 5.8546e-02, -1.3710e-02, -1.9255e-01,  ...,  4.8425e-02,
           -8.8405e-02,  5.4689e-02]],

         [[-1.3192e-01,  8.8368e-02, -1.4156e-01,  ..., -1.5053e-01,
           -3.6285e-02,  1.3985e-01]],

         [[ 3.4798e-02, -2.0436e-02,  2.9200e-02,  ..., -8.3135e-02,
            6.5053e-03, -9.5515e-02]],

  

In [7]:
pooled_output

tensor([[-0.1690, -0.0028, -0.0676,  ..., -0.0259,  0.0784,  0.0794],
        [-0.1619,  0.1977, -0.1895,  ..., -0.0112, -0.2003,  0.0325]])

In [8]:
x1 = torch.rand(1, 14, 14, 224)
x2 = torch.rand(1, 14, 14, 512)

In [9]:
(att(x1, x2)).shape

torch.Size([1, 14, 14, 128])

In [10]:
ClassifierHead(in_features=14, num_classes=2)(att(x1, x2))

tensor([[-0.1777,  0.0030]], grad_fn=<AddmmBackward0>)

In [11]:
tem = torch.rand(1, 2, 10)
torch.nn.Softmax(dim=1)(tem)

tensor([[[0.5063, 0.4341, 0.5391, 0.3359, 0.4139, 0.5473, 0.4205, 0.4878,
          0.5356, 0.4875],
         [0.4937, 0.5659, 0.4609, 0.6641, 0.5861, 0.4527, 0.5795, 0.5122,
          0.4644, 0.5125]]])

In [53]:
class CNN(nn.Module):
    def __init__(self, input_channels):
        super(CNN, self).__init__()
        self.module = nn.Sequential(
            nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.GELU(),
            nn.AvgPool2d(kernel_size=2, stride=1, padding=1),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.GELU(),
            nn.AvgPool2d(kernel_size=2, stride=2, padding=1),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.GELU(),
            nn.AvgPool2d(kernel_size=2, stride=1),

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.GELU(),

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.GELU(),
        )

    def forward(self, x):
        return self.module(x)


CNN(3)(image).shape

torch.Size([3, 512, 112, 112])

torch.Size([3, 512, 109, 109])