## Import

In [3]:
import torch
import torch.nn as nn
import pandas as pd
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import os
import numpy as np
import random
import warnings
import timm
warnings.filterwarnings(action='ignore') 

In [2]:
#model_names = timm.list_models(pretrained=True)
model_names = timm.list_models('*swin*',pretrained=True)

for i in model_names:
    print(i)

convit_base
convit_small
convit_tiny
crossvit_9_240
crossvit_9_dagger_240
crossvit_15_240
crossvit_15_dagger_240
crossvit_15_dagger_408
crossvit_18_240
crossvit_18_dagger_240
crossvit_18_dagger_408
crossvit_base_240
crossvit_small_240
crossvit_tiny_240
levit_128
levit_128s
levit_192
levit_256
levit_384
vit_base_patch8_224
vit_base_patch8_224_in21k
vit_base_patch16_224
vit_base_patch16_224_in21k
vit_base_patch16_224_miil
vit_base_patch16_224_miil_in21k
vit_base_patch16_384
vit_base_patch16_sam_224
vit_base_patch32_224
vit_base_patch32_224_in21k
vit_base_patch32_384
vit_base_patch32_sam_224
vit_base_r50_s16_224_in21k
vit_base_r50_s16_384
vit_huge_patch14_224_in21k
vit_large_patch16_224
vit_large_patch16_224_in21k
vit_large_patch16_384
vit_large_patch32_224_in21k
vit_large_patch32_384
vit_large_r50_s32_224
vit_large_r50_s32_224_in21k
vit_large_r50_s32_384
vit_small_patch16_224
vit_small_patch16_224_in21k
vit_small_patch16_384
vit_small_patch32_224
vit_small_patch32_224_in21k
vit_small_pat

In [4]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [5]:
print(torch.__version__)

2.0.1+cu117


## Hyperparameter Settings

In [6]:
CFG = {
    'IMG_SIZE':224,
    'EPOCHS': 30, #Your Epochs,
    'LR':0.0001, #Your Learning Rate,
    'BATCH_SIZE': 16, #Your Batch Size,
    'SEED':41
}

## Fixed Random-Seed

In [7]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Custom Dataset

In [8]:
class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['img_path']
        img = Image.open(img_path).convert('RGB')
        
        if self.transform:
            img = self.transform(img)
        
        # mos column 존재 여부에 따라 값을 설정
        mos = float(self.dataframe.iloc[idx]['mos']) if 'mos' in self.dataframe.columns else 0.0
        
        return img, mos

## Define Model

In [13]:
class BaseModel(nn.Module):
    def __init__(self):
        super(BaseModel, self).__init__()

        # Image feature extraction using ResNet50
        self.cnn_backbone = timm.create_model('vit_base_patch16_224_in21k',pretrained=True).to(device)
        # Remove the last fully connected layer to get features
        modules = list(self.cnn_backbone.children())[:-1]
        self.cnn = nn.Sequential(*modules)

        # Image quality assessment head
        self.layer1 = nn.Linear(327680,163840)
        self.layer2 = nn.Linear(163840,81920)
        self.layer3 = nn.Linear(81920,40910)
        self.layer4 = nn.Linear(40910,20455)
        self.layer5 = nn.Linear(20455,2048)
        self.regression_head = nn.Linear(2048, 1)  # ResNet50 last layer has 2048 features

    def forward(self, images):
        # CNN
        features = self.cnn(images)
        #features_flat = features.view(features.size(0), -1)
        features_flat = features.reshape(features.size(0), -1)
        
        # Image quality regression
        features_flat = self.layer1(features_flat)
        features_flat = self.layer2(features_flat)
        features_flat = self.layer3(features_flat)
        features_flat = self.layer4(features_flat)
        features_flat = self.layer5(features_flat)
        mos = self.regression_head(features_flat)
        return mos

## Train

In [14]:
# 데이터 로드
train_data = pd.read_csv('../open/train.csv')

In [15]:
train_data['img_path'] = train_data['img_path'].apply(lambda x: x.replace('./','../open/'))
train_data

Unnamed: 0,img_name,img_path,mos,comments
0,41wy7upxzl,../open/train/41wy7upxzl.jpg,5.569231,the pink and blue really compliment each other...
1,ygujjq6xxt,../open/train/ygujjq6xxt.jpg,6.103175,love rhubarb! great colors!
2,wk321130q0,../open/train/wk321130q0.jpg,5.541985,i enjoy the textures and grungy feel to this. ...
3,w50dp2zjpg,../open/train/w50dp2zjpg.jpg,6.234848,"i like all the different colours in this pic, ..."
4,l7rqfxeuh0,../open/train/l7rqfxeuh0.jpg,5.190476,"i love these critters, just wish he was a litt..."
...,...,...,...,...
74563,zbevd0lyox,../open/train/zbevd0lyox.jpg,5.926108,"perfect balance here, in this soft serene image."
74564,w26yu6ee60,../open/train/w26yu6ee60.jpg,5.966346,very nice indeed. the sharpness and contrast a...
74565,a1pts9zzdx,../open/train/a1pts9zzdx.jpg,5.718447,nice tones and color for balance.
74566,pzbubeo03l,../open/train/pzbubeo03l.jpg,6.007843,i like the bold colors. nice sharp image.


In [16]:
# 데이터셋 및 DataLoader 생성
transform = transforms.Compose([
    transforms.Resize((CFG['IMG_SIZE'], CFG['IMG_SIZE'])), 
    transforms.ToTensor()
])
train_dataset = CustomDataset(train_data, transform)
train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True,pin_memory=True)

# 모델, 손실함수, 옵티마이저
model = BaseModel().to(device)
criterion1 = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=CFG['LR'])

# 학습
model.train()
for epoch in range(CFG['EPOCHS']):
    total_loss = 0
    loop = tqdm(train_loader, leave=True)
    for imgs, mos in loop:
        imgs, mos = imgs.float().to(device), mos.float().to(device)

        # Forward & Loss
        predicted_mos = model(imgs)
        loss = criterion1(predicted_mos.squeeze(1), mos)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        loop.set_description(f"Epoch {epoch + 1}")
        loop.set_postfix(loss=loss.item())

    print(f"Epoch {epoch + 1} finished with average loss: {total_loss / len(train_loader):.4f}")

RuntimeError: [enforce fail at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 214748364800 bytes.

## Inference & Submit

In [None]:
test_data = pd.read_csv('../open/test.csv')

In [None]:
test_data['img_path'] = test_data['img_path'].apply(lambda x: x.replace('./','../open/'))
test_data

In [None]:
test_dataset = CustomDataset(test_data, transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model.eval()
predicted_mos_list = []

# 추론 과정
with torch.no_grad():
    for imgs, _ in tqdm(test_loader):
        imgs = imgs.float().cuda()
        mos = model(imgs)
        predicted_mos_list.append(mos)

In [None]:
lst = []
for i in tqdm(predicted_mos_list):
    for j in i:
        lst.append(np.float(j))

In [None]:
sub_df = pd.read_csv('../Sub/submission_v2.csv')

In [None]:
sub_df['mos'] = lst
sub_df

In [None]:
sub_df.to_csv('../Sub/submit_v.csv', index=False)

print("Inference completed and results saved to submit.csv.")