In [None]:
!pip install timm

In [None]:
import warnings
warnings.filterwarnings('ignore')

from glob import glob
import pandas as pd
import numpy as np 
from tqdm import tqdm
import cv2

import os
import timm
import random

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
from sklearn.metrics import f1_score, accuracy_score
import time



device = torch.device('cuda')

In [None]:
SEED = 42

def reset_seeds(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)    # 파이썬 환경변수 시드 고정
    np.random.seed(seed)
    torch.manual_seed(seed) # cpu 연산 무작위 고정
    torch.cuda.manual_seed(seed) # gpu 연산 무작위 고정
    torch.backends.cudnn.deterministic = True  # cuda 라이브러리에서 Deterministic(결정론적)으로 예측하기 (예측에 대한 불확실성 제거 )

reset_seeds(SEED)

In [None]:
os.chdir("/content/drive/MyDrive/DL_Project")

In [None]:
train_png = sorted(glob('train/*.png'))
test_png = sorted(glob('test/*.png'))

In [None]:
len(train_png), len(test_png)

In [None]:
train_y = pd.read_csv("/content/drive/MyDrive/DL_Project/train_df.csv")

train_labels = train_y["label"]

label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

train_labels = [label_unique[k] for k in train_labels]

In [None]:
def img_load(path): # 회색 변경, 리사이즈즈
  img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
  img = cv2.imread(path[:,:,::-1])
  img = cv2.resize(img, (256, 256))

In [None]:
train_imgs = [img_load(m) for m in tqdm(train_256_png)] # train_imgs 다운로드
test_imgs = [img_load(n) for n in tqdm(test_png)] # test_imgs 다운로드

In [None]:
class Custom_dataset(Dataset): # 여긴 같음음
   def __init__(self, img_paths, labels, mode='train'): 
        self.img_paths = img_paths
        self.labels = labels
        self.mode=mode
   def __len__(self):
        return len(self.img_paths)
   def __getitem__(self, idx):
       img = self.img_paths[idx]
       if self.mode=='train':
           augmentation = random.randint(0,2)
           if augmentation==1:
               img = img[::-1].copy()
           elif augmentation==2:
               img = img[:,::-1].copy()
       img = transforms.ToTensor()(img)
       if self.mode=='test':
           pass
        
       label = self.labels[idx]
       return img, label

class Network(nn.Module): # CNN, baseline/non linear module (ReLU 차이)
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super(Network, self).__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_shape),
            nn.ReLU()
        )
    def forward(self, x: torch.Tensor):
        return return self.layer_stack(x)        

In [None]:
torch.manual_seed(SEED)

baseline_model = BaselineModel(input_shape=784,
                               hidden_units=10,
                               output_shape=len(class_names)
                               )
baseline_model.to(device)

In [None]:
# 여기는 score를 계산하는 것으로 보입니다.
def score_function(real, pred):
  score = f1_score(real, pred, average="macro")
  return score

In [None]:
# dataset : 전체 dataset 구성 dataloader : mini batch 만드는 역할할
batch_size = 32 # batch_size : 사진들을 몇 개 묶음으로 할 거냐
epochs = 50 # 학습 시도 횟수

# 데이터 셋과 데이터 로더 부분

# Train
train_dataset = Custom_dataset(np.array(train_imgs), np.array(train_labels), mode='train') # train 데이터셋 학습 모델
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

# Test
test_dataset = Custom_dataset(np.array(test_imgs), np.array(["tmp"]*len(test_imgs)), mode='test') # test 데이터셋 학습 모델
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [None]:
def score_function(real, pred):
    score = f1_score(real, pred, average="macro")
    return score # 모델 스코어

model = Network().to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) # learning rate 설정정
criterion = nn.CrossEntropyLoss() # 기본 nCELoss
scaler = torch.cuda.amp.GradScaler() #cuda 제공 기본      

best=0
for epoch in range(epochs): # 학습 기본 설정 setting
    start=time.time()
    train_loss = 0
    train_pred=[]
    train_y=[]
    model.train()
    for batch in (train_loader):
        optimizer.zero_grad()
        x = torch.tensor(batch[0], dtype=torch.float32, device=device)
        y = torch.tensor(batch[1], dtype=torch.long, device=device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        loss = criterion(pred, y)
        
############################## 여기가 학습 ##############################

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        train_loss += loss.item()/len(train_loader)
        train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
        train_y += y.detach().cpu().numpy().tolist()
        
    
    train_f1 = score_function(train_y, train_pred)

    TIME = time.time() - start
    print(f'epoch : {epoch+1}/{epochs}    time : {TIME:.0f}s/{TIME*(epochs-epoch-1):.0f}s') # 시간 확인 안내내
    print(f'TRAIN    loss : {train_loss:.5f}    f1 : {train_f1:.5f}')

In [None]:
model.eval()
f_pred = [] 

with torch.no_grad():
    for batch in (test_loader):
        x = torch.tensor(batch[0], dtype = torch.float32, device = device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        f_pred.extend(pred.argmax(1).detach().cpu().numpy().tolist())

# Baseline model

In [None]:
# Import tqdm for progress bar
from tqdm.auto import tqdm

# Set the seed and start the timer
torch.manual_seed(42)
train_time_start_on_cpu = timer()

# Set the number of epochs (we'll keep this small for faster training times)
epochs = 3

# Create training and testing loop
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n-------")
    ### Training
    train_loss = 0
    # Add a loop to loop through training batches
    for batch, (X, y) in enumerate(train_dataloader):
        baseline_model.train() 
        # 1. Forward pass
        y_pred = baseline_model(X)

        # 2. Calculate loss (per batch)
        loss = loss_fn(y_pred, y)
        train_loss += loss # accumulatively add up the loss per epoch 

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        # Print out how many samples have been seen
        if batch % 400 == 0:
            print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples")

    # Divide total train loss by length of train dataloader (average loss per batch per epoch)
    train_loss /= len(train_dataloader)
    
    ### Testing
    # Setup variables for accumulatively adding up loss and accuracy 
    test_loss, test_acc = 0, 0 
    baseline_model.eval()
    with torch.inference_mode():
        for X, y in test_dataloader:
            # 1. Forward pass
            test_pred = baseline_model(X)

            # 2. Calculate loss (accumatively)
            test_loss += loss_fn(test_pred, y) # accumulatively add up the loss per epoch

            # 3. Calculate accuracy (preds need to be same as y_true)
            test_acc += accuracy_fn(y_true=y, y_pred=test_pred.argmax(dim=1))
        
        # Calculations on test metrics need to happen inside torch.inference_mode()
        # Divide total test loss by length of test dataloader (per batch)
        test_loss /= len(test_dataloader)

        # Divide total accuracy by length of test dataloader (per batch)
        test_acc /= len(test_dataloader)

    ## Print out what's happening
    print(f"\nTrain loss: {train_loss:.5f} | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%\n")

# Calculate training time      
train_time_end_on_cpu = timer()
total_train_time_baseline_model = print_train_time(start=train_time_start_on_cpu, 
                                                    end=train_time_end_on_cpu,
                                                    device=str(next(baseline_model.parameters()).device))

- 데이터를 나눈 뒤 전처리를 해야 함
- 전처리한 데이터를 합친 뒤 기본 모델을 사용하여 스코어 계산