<a href="https://colab.research.google.com/github/zbooster/Antenna-Performance-Prediction/blob/main/Antenna_Resnet34_Init.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 0.Colab 기본 준비단계

# 1.데이터 준비하기

In [2]:
import os
import gdown
import glob
import pandas as pd
from collections import defaultdict

id = "10Hpa4YM0KX_Ig0W9w7DbTdq62nF2UThA"
output = "./open.zip"

if not os.path.isdir('./datasets'):
  gdown.download(id=id, output=output)
  gdown.extractall(path=output, to='./datasets')

raw_data = defaultdict(pd.DataFrame)

for fname in glob.glob('/content/datasets/**/*.csv', recursive=True):
    df_name = os.path.splitext(os.path.basename(fname))[0]
    raw_data[df_name] = pd.read_csv(fname)

Downloading...
From: https://drive.google.com/uc?id=10Hpa4YM0KX_Ig0W9w7DbTdq62nF2UThA
To: /content/open.zip
100%|██████████| 10.0M/10.0M [00:00<00:00, 211MB/s]


# 2.데이터 분석
(작성중...)

# 3.데이터셋(Dataset) 준비하기


## 3.1.테스트데이터 분리

In [4]:
train = raw_data['train'].copy()
len(train)

39607

In [6]:
from sklearn.model_selection import train_test_split

X = train.filter(regex='X').copy()
y = train.filter(regex='Y').copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=True, random_state=42)
len(X_train), len(X_test)

(26536, 13071)

## 3.2.데이터셋(Datasets)만들기
* 데이터 전처리단계를 Custom datasets에 넣는다.

In [25]:
import numpy as np
import torch
from torch.utils.data import Dataset

class AssemblyDataset(Dataset): 
    def __init__(self, X, y):
        self.X_data = X.values
        self.y_data = y.values
        
        # Resnet34에 입력하기 위해 Shape을 바꿔준다.
        len_cx = len(self.X_data[0,:])
        s_size = int(np.sqrt(len_cx / 3) + 1)
        t_shape = s_size ** 2 * 3
        p_size = t_shape - len_cx
        self.X_data = np.pad(
            self.X_data, (0,p_size), 'constant', constant_values=0)[:len(self.X_data)]
        self.X_data = self.X_data.reshape(len(self.X_data), -1, s_size, s_size)

    def __len__(self): 
        return len(self.X_data)

    def __getitem__(self, idx): 
        X = torch.FloatTensor(self.X_data[idx,:])
        y = torch.FloatTensor(self.y_data[idx,:])
        return X, y

In [26]:
train_dataset = AssemblyDataset(X_train, y_train)
vaild_dataset = AssemblyDataset(X_train, y_train)

## 3.2.데이터로더(DataLoader)

In [27]:
from torch.utils.data import DataLoader

tr_dataloader = DataLoader(train_dataset, batch_size=512, shuffle=True)
va_dataloader = DataLoader(train_dataset, batch_size=len(vaild_dataset), shuffle=False)

* 원하는 모양으로 잘 나오는지 확인

In [28]:
for input, target in tr_dataloader:
    print(input.shape)
    break

torch.Size([512, 3, 5, 5])


# 4.모델
* device는 GPU를 사용한다.

In [29]:
from torchvision.models import resnet34
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def build_network(output_size):
    network = resnet34()
    num_ftrs = network.fc.in_features
    network.fc = nn.Linear(num_ftrs, output_size)
    return network.to(device)

# 5.옵티마이저와 스케쥴러
* 옵티마이저는 AdamW을 사용한다.

In [30]:
import torch.optim as optim

def build_optimizer(network, learning_rate):
    optimizer = optim.AdamW(network.parameters(), lr=learning_rate)
    return optimizer

* 스케쥴러는 StepLR을 step_size와 gamma를 변경해가며 테스트할 수 있도록 작성했다.


In [31]:
from torch.optim import lr_scheduler

def build_scheduler(optimizer, step_size, gamma):
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
    return exp_lr_scheduler

# 6.학습/검증 단계(Epoch)
* 대회에서 제공한 평가산식을 loss로 사용한다.

In [32]:
class NRMSE(nn.Module):
  def __init__(self):
    super(NRMSE, self).__init__()
    self.mse = nn.MSELoss().to(device)

  def forward(self, gt, preds):
    all_nrmse = torch.zeros(14)
    for idx in range(14):
      rmse = torch.sqrt(self.mse(preds[:,idx], gt[:,idx]))
      nrmse = rmse / torch.mean(torch.abs(gt[:,idx]))
      all_nrmse[idx] = nrmse
    score = 1.2 * torch.sum(all_nrmse[:8]) + 1.0 * torch.sum(all_nrmse[8:14])
    return score

In [33]:
critetion = NRMSE().to(device)

## 6.1.학습(Train) 단계

In [34]:
def train_epoch(network, loader, optimizer, scheduler):
    network.train()
    running_loss = 0.0

    for idx, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()

        with torch.set_grad_enabled(True):
          # ➡ Forward pass
          outputs = network(data)
          loss = critetion(target, outputs)

          # ⬅ Backward pass + weight update
          loss.backward()
          optimizer.step()

        running_loss += loss

    scheduler.step()
    
    epoch_loss = running_loss / len(loader)

    return epoch_loss

## 6.2.검증(Validation) 단계

In [35]:
def validation_epoch(network, loader, optimizer):
    network.eval()
    
    running_loss = 0.0

    for _, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()

        # ➡ Forward pass
        outputs = network(data)
        loss = critetion(target, outputs)

        running_loss += loss

    epoch_loss = running_loss  / len(loader)

    return epoch_loss

# 7.수행

In [37]:
import time

epochs = 30
network = build_network(14)
optimizer = build_optimizer(network, 0.001)
scheduler = build_scheduler(optimizer, 7, 0.1)

for epoch in range(epochs):
    since = time.time()
    
    train_score = train_epoch(network, tr_dataloader, optimizer, scheduler)
    print("Epoch: %4d, Train score: %.4f" % (epoch+1, train_score), end='')
    vaild_score = validation_epoch(network, va_dataloader, optimizer)
    print(", Validation score: %.4f" % (vaild_score), end='')
    time_elapsed = time.time() - since
    print(", Time Elapsed(s): %.2f" % (time_elapsed))

Epoch:    1, Train score: 6.7053, Validation score: 2.2447, Time Elapsed(s): 6.43
Epoch:    2, Train score: 2.1711, Validation score: 2.1362, Time Elapsed(s): 6.45
Epoch:    3, Train score: 2.1965, Validation score: 2.3728, Time Elapsed(s): 6.48
Epoch:    4, Train score: 2.1255, Validation score: 2.2671, Time Elapsed(s): 6.54
Epoch:    5, Train score: 2.1034, Validation score: 2.0560, Time Elapsed(s): 6.62
Epoch:    6, Train score: 2.0627, Validation score: 2.0456, Time Elapsed(s): 6.47
Epoch:    7, Train score: 2.0746, Validation score: 2.0401, Time Elapsed(s): 6.46
Epoch:    8, Train score: 2.0333, Validation score: 2.0199, Time Elapsed(s): 6.40
Epoch:    9, Train score: 2.0210, Validation score: 2.0184, Time Elapsed(s): 6.30
Epoch:   10, Train score: 2.0092, Validation score: 2.0130, Time Elapsed(s): 6.32
Epoch:   11, Train score: 2.0130, Validation score: 2.0139, Time Elapsed(s): 6.31
Epoch:   12, Train score: 2.0141, Validation score: 2.0134, Time Elapsed(s): 6.32
Epoch:   13, Tra