## Import

In [1]:
import torch
import torch.nn as nn
import pandas as pd
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms.functional as F
import torch.optim as optim
from torch.cuda.amp import GradScaler

from PIL import Image
from tqdm import tqdm
import os
import numpy as np
import random
import warnings
import timm
from lion_pytorch import Lion
warnings.filterwarnings(action='ignore') 

In [2]:
#model_names = timm.list_models(pretrained=True)
model_names = timm.list_models('*coat*',pretrained=True)

for i in model_names:
    print(i)

coat_lite_medium.in1k
coat_lite_medium_384.in1k
coat_lite_mini.in1k
coat_lite_small.in1k
coat_lite_tiny.in1k
coat_mini.in1k
coat_small.in1k
coat_tiny.in1k
coatnet_0_rw_224.sw_in1k
coatnet_1_rw_224.sw_in1k
coatnet_2_rw_224.sw_in12k
coatnet_2_rw_224.sw_in12k_ft_in1k
coatnet_3_rw_224.sw_in12k
coatnet_bn_0_rw_224.sw_in1k
coatnet_nano_rw_224.sw_in1k
coatnet_rmlp_1_rw2_224.sw_in12k
coatnet_rmlp_1_rw2_224.sw_in12k_ft_in1k
coatnet_rmlp_1_rw_224.sw_in1k
coatnet_rmlp_2_rw_224.sw_in1k
coatnet_rmlp_2_rw_224.sw_in12k
coatnet_rmlp_2_rw_224.sw_in12k_ft_in1k
coatnet_rmlp_2_rw_384.sw_in12k_ft_in1k
coatnet_rmlp_nano_rw_224.sw_in1k
coatnext_nano_rw_224.sw_in1k


In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [4]:
print(torch.__version__)

2.0.1+cu117


## Hyperparameter Settings

In [5]:
CFG = {
    'IMG_SIZE':224,
    'EPOCHS': 25, #Your Epochs,
    'LR':0.0001, #Your Learning Rate,
    'BATCH_SIZE': 16, #Your Batch Size,
    'SEED':41
}

## Fixed Random-Seed

In [6]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Custom Dataset

In [7]:
class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['img_path']
        img = Image.open(img_path).convert('RGB')
        
        if self.transform:
            img = self.transform(img)
        
        # mos column 존재 여부에 따라 값을 설정
        mos = float(self.dataframe.iloc[idx]['mos']) if 'mos' in self.dataframe.columns else 0.0
        
        return img, mos

## Define Model

In [8]:
class BaseModel(nn.Module):
    def __init__(self):
        super(BaseModel, self).__init__()

        # Image feature extraction using ResNet50
        self.cnn_backbone = timm.create_model('coat_lite_tiny.in1k',pretrained=True).to(device)
        # Remove the last fully connected layer to get features
        modules = list(self.cnn_backbone.children())[:-1]
        self.cnn = nn.Sequential(*modules)

        # Image quality assessment head
        #self.layer = nn.Linear(50176,2048)
        self.regression_head = nn.Linear(1000, 1)  # ResNet50 last layer has 2048 features

    def forward(self, images):
        # CNN
        features = self.cnn_backbone(images)
        #features_flat = features.view(features.size(0), -1)
        features_flat = features.reshape(features.size(0), -1)
        
        # Image quality regression
        #features_flat = self.layer(features_flat)
        mos = self.regression_head(features_flat)
        return mos

## Train

In [9]:
# 데이터 로드
train_data = pd.read_csv('../open/train.csv')

In [10]:
train_data['img_path'] = train_data['img_path'].apply(lambda x: x.replace('./','../open/'))
train_data.drop_duplicates(['img_name'],inplace=True)
train_data.reset_index(drop=True,inplace=True)
train_data

Unnamed: 0,img_name,img_path,mos,comments
0,41wy7upxzl,../open/train/41wy7upxzl.jpg,5.569231,the pink and blue really compliment each other...
1,ygujjq6xxt,../open/train/ygujjq6xxt.jpg,6.103175,love rhubarb! great colors!
2,wk321130q0,../open/train/wk321130q0.jpg,5.541985,i enjoy the textures and grungy feel to this. ...
3,w50dp2zjpg,../open/train/w50dp2zjpg.jpg,6.234848,"i like all the different colours in this pic, ..."
4,l7rqfxeuh0,../open/train/l7rqfxeuh0.jpg,5.190476,"i love these critters, just wish he was a litt..."
...,...,...,...,...
54657,zbevd0lyox,../open/train/zbevd0lyox.jpg,5.926108,"perfect balance here, in this soft serene image."
54658,w26yu6ee60,../open/train/w26yu6ee60.jpg,5.966346,very nice indeed. the sharpness and contrast a...
54659,a1pts9zzdx,../open/train/a1pts9zzdx.jpg,5.718447,nice tones and color for balance.
54660,pzbubeo03l,../open/train/pzbubeo03l.jpg,6.007843,i like the bold colors. nice sharp image.


In [11]:
# 데이터셋 및 DataLoader 생성
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.3),  # Randomly apply horizontal flip with a probability of 0.3
    transforms.RandomVerticalFlip(p=0.3),    # Randomly apply vertical flip with a probability of 0.3
    transforms.Lambda(lambda img: F.resize(img, (CFG['IMG_SIZE'], CFG['IMG_SIZE']))),
    transforms.ToTensor()
])
train_dataset = CustomDataset(train_data, transform)
train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True,pin_memory=True)


# 모델, 손실함수, 옵티마이저 초기화
model = BaseModel().to(device)
criterion1 = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=CFG['LR'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, 
                                                                 T_mult=1, eta_min=0.00001)
#optimizer = Lion(model.parameters(), lr=1e-4, weight_decay=1e-2)
scaler = GradScaler()

# 학습
model.train()
for epoch in range(CFG['EPOCHS']):
    total_loss = 0
    loop = tqdm(train_loader, leave=True)
    for imgs, mos in loop:
        imgs, mos = imgs.float().to(device), mos.float().to(device)

        # Forward & Loss
        predicted_mos = model(imgs)
        loss = criterion1(predicted_mos.squeeze(1), mos)

        # Backpropagation
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        total_loss += loss.item()
        loop.set_description(f"Epoch {epoch + 1}")
        loop.set_postfix(loss=loss.item())
    
    scheduler.step()
    
    print(f"Epoch {epoch + 1} finished with average loss: {total_loss / len(train_loader):.4f}")
    
    model_checkpoint_path = f'../checkpoint2/model_checkpoint_{epoch+1}.pth'

    # 모델 가중치 저장
    torch.save(model.state_dict(), model_checkpoint_path)


Epoch 1: 100%|██████████████████████████████████████████████████████████| 3417/3417 [11:33<00:00,  4.93it/s, loss=0.27]


Epoch 1 finished with average loss: 0.5143


Epoch 2: 100%|█████████████████████████████████████████████████████████| 3417/3417 [10:50<00:00,  5.25it/s, loss=0.629]


Epoch 2 finished with average loss: 0.4060


Epoch 3: 100%|█████████████████████████████████████████████████████████| 3417/3417 [10:57<00:00,  5.20it/s, loss=0.827]


Epoch 3 finished with average loss: 0.3541


Epoch 4: 100%|█████████████████████████████████████████████████████████| 3417/3417 [10:51<00:00,  5.24it/s, loss=0.117]


Epoch 4 finished with average loss: 0.2942


Epoch 5: 100%|█████████████████████████████████████████████████████████| 3417/3417 [10:51<00:00,  5.24it/s, loss=0.338]


Epoch 5 finished with average loss: 0.2331


Epoch 6: 100%|█████████████████████████████████████████████████████████| 3417/3417 [10:46<00:00,  5.28it/s, loss=0.251]


Epoch 6 finished with average loss: 0.1739


Epoch 7: 100%|█████████████████████████████████████████████████████████| 3417/3417 [10:45<00:00,  5.29it/s, loss=0.162]


Epoch 7 finished with average loss: 0.1268


Epoch 8: 100%|█████████████████████████████████████████████████████████| 3417/3417 [10:44<00:00,  5.30it/s, loss=0.061]


Epoch 8 finished with average loss: 0.0908


Epoch 9: 100%|████████████████████████████████████████████████████████| 3417/3417 [10:46<00:00,  5.29it/s, loss=0.0603]


Epoch 9 finished with average loss: 0.0670


Epoch 10: 100%|████████████████████████████████████████████████████████| 3417/3417 [10:45<00:00,  5.30it/s, loss=0.095]


Epoch 10 finished with average loss: 0.0523


Epoch 11: 100%|████████████████████████████████████████████████████████| 3417/3417 [11:07<00:00,  5.12it/s, loss=0.139]


Epoch 11 finished with average loss: 0.1623


Epoch 12: 100%|████████████████████████████████████████████████████████| 3417/3417 [11:07<00:00,  5.12it/s, loss=0.168]


Epoch 12 finished with average loss: 0.1425


Epoch 13: 100%|████████████████████████████████████████████████████████| 3417/3417 [11:16<00:00,  5.05it/s, loss=0.199]


Epoch 13 finished with average loss: 0.1186


Epoch 14: 100%|████████████████████████████████████████████████████████| 3417/3417 [11:23<00:00,  5.00it/s, loss=0.166]


Epoch 14 finished with average loss: 0.0976


Epoch 15: 100%|███████████████████████████████████████████████████████| 3417/3417 [11:29<00:00,  4.95it/s, loss=0.0609]


Epoch 15 finished with average loss: 0.0773


Epoch 16: 100%|████████████████████████████████████████████████████████| 3417/3417 [11:23<00:00,  5.00it/s, loss=0.152]


Epoch 16 finished with average loss: 0.0582


Epoch 17: 100%|███████████████████████████████████████████████████████| 3417/3417 [11:25<00:00,  4.98it/s, loss=0.0457]


Epoch 17 finished with average loss: 0.0425


Epoch 18: 100%|████████████████████████████████████████████████████████| 3417/3417 [11:26<00:00,  4.98it/s, loss=0.027]


Epoch 18 finished with average loss: 0.0301


Epoch 19: 100%|███████████████████████████████████████████████████████| 3417/3417 [11:25<00:00,  4.98it/s, loss=0.0446]


Epoch 19 finished with average loss: 0.0217


Epoch 20: 100%|███████████████████████████████████████████████████████| 3417/3417 [11:19<00:00,  5.03it/s, loss=0.0159]


Epoch 20 finished with average loss: 0.0166


Epoch 21: 100%|████████████████████████████████████████████████████████| 3417/3417 [11:19<00:00,  5.03it/s, loss=0.106]


Epoch 21 finished with average loss: 0.0866


Epoch 22: 100%|███████████████████████████████████████████████████████| 3417/3417 [11:21<00:00,  5.01it/s, loss=0.0425]


Epoch 22 finished with average loss: 0.0845


Epoch 23: 100%|██████████████████████████████████████████████████████████| 3417/3417 [11:22<00:00,  5.01it/s, loss=0.1]


Epoch 23 finished with average loss: 0.0704


Epoch 24: 100%|███████████████████████████████████████████████████████| 3417/3417 [11:27<00:00,  4.97it/s, loss=0.0958]


Epoch 24 finished with average loss: 0.0600


Epoch 25: 100%|███████████████████████████████████████████████████████| 3417/3417 [11:26<00:00,  4.98it/s, loss=0.0372]

Epoch 25 finished with average loss: 0.0470





In [None]:
model_checkpoint_path = f'../checkpoint/model_checkpoint_19.pth'
checkpoint = torch.load(model_checkpoint_path)
model.load_state_dict(checkpoint)

In [None]:
# 저장한 모델 가중치 불러오기
checkpoint = torch.load(model_checkpoint_path)
model = BaseModel().to(device)
model.load_state_dict(checkpoint)

# 추가 학습 설정 (예를 들어, 추가 에폭 설정)
additional_epochs = 5

# 추가 학습
model.train()
for epoch in range(additional_epochs):
    total_loss = 0
    loop = tqdm(train_loader, leave=True)
    for imgs, mos in loop:
        imgs, mos = imgs.float().to(device), mos.float().to(device)

        # Forward & Loss
        predicted_mos = model(imgs)
        loss = criterion1(predicted_mos.squeeze(1), mos)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        loop.set_description(f"Additional Epoch {epoch + 1}")
        loop.set_postfix(loss=loss.item())

    print(f"Additional Epoch {epoch + 1} finished with average loss: {total_loss / len(train_loader):.4f}")
    
    model_checkpoint_path = f'../checkpoint/model_checkpoint_{24+epoch}.pth'

    # 모델 가중치 저장
    torch.save(model.state_dict(), model_checkpoint_path)


## Inference & Submit

In [12]:
test_data = pd.read_csv('../open/test.csv')

In [13]:
test_data['img_path'] = test_data['img_path'].apply(lambda x: x.replace('./','../open/'))
test_data

Unnamed: 0,img_name,img_path
0,j00zs3u6dr,../open/test/j00zs3u6dr.jpg
1,ytv70so3zb,../open/test/ytv70so3zb.jpg
2,ia9890oozp,../open/test/ia9890oozp.jpg
3,xsj81ypx4a,../open/test/xsj81ypx4a.jpg
4,f23994ghlh,../open/test/f23994ghlh.jpg
...,...,...
13007,ya8k1hklrg,../open/test/ya8k1hklrg.jpg
13008,j59djzkvpj,../open/test/j59djzkvpj.jpg
13009,o6w8lkfdy0,../open/test/o6w8lkfdy0.jpg
13010,hq68sz9xf5,../open/test/hq68sz9xf5.jpg


In [14]:
test_dataset = CustomDataset(test_data, transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model.eval()
predicted_mos_list = []

# 추론 과정
with torch.no_grad():
    for imgs, _ in tqdm(test_loader):
        imgs = imgs.float().cuda()
        mos = model(imgs)
        predicted_mos_list.append(mos)

100%|████████████████████████████████████████████████████████████████████████████████| 407/407 [01:47<00:00,  3.79it/s]


In [15]:
flattened_list = [items.item() for sublist in predicted_mos_list for items in sublist]

# 결과 출력
print(flattened_list)

[5.57431173324585, 5.9142656326293945, 5.260872840881348, 5.4693145751953125, 5.031832218170166, 5.223528861999512, 6.89091682434082, 5.155252456665039, 5.818758964538574, 5.027063846588135, 4.992257118225098, 5.063642501831055, 4.063022613525391, 4.758679389953613, 5.182940483093262, 4.544816017150879, 4.1372246742248535, 4.03920841217041, 4.470035552978516, 4.691587924957275, 5.754860877990723, 4.94820499420166, 5.818216323852539, 6.115809440612793, 5.693207740783691, 6.941743850708008, 3.5581254959106445, 5.874473571777344, 6.452884674072266, 5.161076545715332, 6.520563125610352, 5.84637451171875, 3.8581295013427734, 5.494946479797363, 5.9863104820251465, 5.87786340713501, 4.8528337478637695, 6.441972255706787, 6.013482093811035, 6.108360290527344, 5.969050884246826, 5.773075103759766, 5.64478874206543, 6.523863792419434, 4.05997371673584, 5.670224189758301, 5.756895065307617, 5.6779680252075195, 5.417461395263672, 4.89456033706665, 5.507961273193359, 4.632259368896484, 6.1089010238

In [None]:
lst = []
for i in tqdm(flattened_list):
    for j in i:
        lst.append(np.float(j))

In [16]:
sub_df = pd.read_csv('../Sub/blip-image-captioning-large_submission_1.csv')

In [17]:
sub_df['mos'] = flattened_list
sub_df

Unnamed: 0,img_name,mos,comments
0,j00zs3u6dr,5.574312,"i like the idea, but the colors are a bit too ..."
1,ytv70so3zb,5.914266,i like the bw treatment. the light is a bit ha...
2,ia9890oozp,5.260873,i like the mood of this photo. the red of the ...
3,xsj81ypx4a,5.469315,"nice detail, but the image is a little flat."
4,f23994ghlh,5.031832,"i like the idea, but the image is a little fla..."
...,...,...,...
13007,ya8k1hklrg,4.550883,i like the soft feel of this photo. the branch...
13008,j59djzkvpj,5.070721,i like the grainy effect. the image is very nice.
13009,o6w8lkfdy0,5.035899,"i like the idea, but the grainy look is a bit ..."
13010,hq68sz9xf5,5.057884,i like the colors and the blur. nice job.


In [18]:
sub_df.to_csv('../Sub/submit_v21.csv', index=False)

print("Inference completed and results saved to submit.csv.")

Inference completed and results saved to submit.csv.
