<a href="https://colab.research.google.com/github/zbooster/Antenna-Performance-Prediction/blob/main/Antenna_Resnet_34.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 0. Colab 기본 준비단계
- Outlier를 제거하기 위해 사용할 statsmodels을 업그레이드 한다. (Runtime restart 필요)

In [None]:
!pip install statsmodels --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting statsmodels
  Downloading statsmodels-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.8 MB)
[K     |████████████████████████████████| 9.8 MB 4.1 MB/s 
Installing collected packages: statsmodels
  Attempting uninstall: statsmodels
    Found existing installation: statsmodels 0.12.2
    Uninstalling statsmodels-0.12.2:
      Successfully uninstalled statsmodels-0.12.2
Successfully installed statsmodels-0.13.2


# 1.데이터 준비하기

In [None]:
import os
import gdown
import glob
import pandas as pd
from collections import defaultdict

id = "10Hpa4YM0KX_Ig0W9w7DbTdq62nF2UThA"
output = "./open.zip"

if not os.path.isdir('./datasets'):
  gdown.download(id=id, output=output)
  gdown.extractall(path=output, to='./datasets')

raw_data = defaultdict(pd.DataFrame)

for fname in glob.glob('/content/datasets/**/*.csv', recursive=True):
    df_name = os.path.splitext(os.path.basename(fname))[0]
    raw_data[df_name] = pd.read_csv(fname)

# 2.Weights & Biases 설정
하이퍼파라미터 튜닝을 위해 Weights & Biases에 로그인하여 데이터를 쌓고 Sweeps를 활용.

#### Package install

In [None]:
!pip install wandb -Uq

#### Login

In [None]:
import wandb

wandb.login()

True

#### Sweep config

method: 모든 파라미터를 다 테스트하기 위해 grid를 선택했다.

In [None]:
sweep_config = {
    'method': 'grid'
}

metric: 검증(Validation) 데이터셋의 정확도(Accuracy)를 최대(maximize)로 올리는 것으로 선택했다.

In [None]:
metric = {
    'name': 'V-Score',
    }

sweep_config['metric'] = metric

parameters_dict: 파라미터 변화에 따른 정확도를 살펴보기 위해 최대한 많이 설정했다.

In [None]:
parameters_dict = {
    'fold': {
        'values': [0, 1, 2, 3, 4, 5] },
    'batch_size': {
        'values': [ 512 ] },
    'learning_rate': {
        'values': [0.001]},
    'step_size': {
        'values': [ 7 ] },
    }

sweep_config['parameters'] = parameters_dict

In [None]:
import pprint

pprint.pprint(sweep_config)

{'method': 'grid',
 'metric': {'name': 'V-Score'},
 'parameters': {'batch_size': {'values': [512]},
                'fold': {'values': [0, 1, 2, 3, 4, 5]},
                'learning_rate': {'values': [0.001]},
                'step_size': {'values': [7]}}}


#### Initialize the Sweep

In [None]:
sweep_id = wandb.sweep(sweep_config, project="Antenna-Resnet34-1")

Create sweep with ID: dtjela50
Sweep URL: https://wandb.ai/zbooster/Antenna-Resnet34-1/sweeps/dtjela50


# 3.데이터셋(Dataset) 준비하기


## 3.1.데이터 전처리


In [None]:
import numpy as np

def split_patten(data):
  start_idx, data_size = 0, len(data)
  result = []
  for i in range(data_size):
    if i < 10 or i > data_size - 10:
      pass
    elif (data['X_30'][i-5:i] < 1.425).all() and (data['X_30'][i:i+5] > 1.425).all():
      result.append(i-start_idx)
      start_idx = i
    elif (data['X_26'][i:i+5] == 2.03).all() and (data['X_27'][i:i+5] == 2.07).all() \
        and len(result) >= 5 and (i - start_idx > 1000):
      result.append(i-start_idx)
      start_idx = i
  else:
    result.append(data_size-start_idx)
  return result

def time_data(data):
  T_dict = {'T_SIN':[], 'T_COS':[]}

  for size in split_patten(data):
    T_dict['T_SIN'].extend([ np.sin(2 * np.pi * i/size) for i in range(size) ])
    T_dict['T_COS'].extend([ np.cos(2 * np.pi * i/size) for i in range(size) ])

  return pd.DataFrame(T_dict).copy()
    
def preprocessing_raw(data_type):
  # CSV 데이터 불러오기
  data = raw_data[data_type].copy()

  # 불필요한 컬럼 제거
  data = data.drop(columns=['ID', 'X_04', 'X_23', 'X_47', 'X_48']).copy()
  data = data.drop(columns=['X_50', 'X_51', 'X_52', 'X_53', 'X_54', 'X_55', 'X_56']).copy()

  # 주기성을 대표할 수 있는 Feature 추가
  Ts = time_data(data)
  data = data.join(Ts).copy()

  # 주기에 따라 데이터 분할
  result = []
  start_idx = data[data['T_SIN']==0].index.to_list()
  for i in range(len(start_idx)-1):
    result.append(data.iloc[start_idx[i]:start_idx[i+1]])
  else:
    result.append(data.iloc[start_idx[i+1]:])
  return result.copy()

In [None]:
data_raw_list = preprocessing_raw('train')
len(pd.concat(data_raw_list))

39607

In [None]:
def select_fold(data_list, n=5):
  train = []
  vaild = []
  for i in range(6):
    if i != n:
      train.append(data_list[i])
      train.append(data_list[i+6])
    else:
      vaild.append(data_list[i])
      vaild.append(data_list[i+6])
  train = pd.concat(train).copy()
  vaild = pd.concat(vaild).copy()
  return train, vaild

train, vaild = select_fold(data_raw_list, 3)

In [None]:
outlier_weight = []
outlier_weight.append({
    'Y_01' : 2.1, 'Y_02' : 3.1, 'Y_03' : 2.1, 'Y_04' : 4.0, 'Y_05' : 4.0, 
    'Y_06' : 2.7, 'Y_07' : 3.9, 'Y_08' : 4.0, 'Y_09' : 4.0, 'Y_10' : 3.6,
    'Y_11' : 4.0, 'Y_12' : 4.0, 'Y_13' : 4.0, 'Y_14' : 4.0
})
outlier_weight.append({
    'Y_01' : 2.7, 'Y_02' : 2.5, 'Y_03' : 2.2, 'Y_04' : 4.0, 'Y_05' : 4.0, 
    'Y_06' : 4.0, 'Y_07' : 3.3, 'Y_08' : 4.0, 'Y_09' : 4.0, 'Y_10' : 4.0,
    'Y_11' : 4.0, 'Y_12' : 4.0, 'Y_13' : 4.0, 'Y_14' : 4.0
})

In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

def outlier_index(data, sigma=3):
  data = data.abs().copy()
  min_index = data.index[0]
  SF = data.values.std() * sigma

  model = ExponentialSmoothing(data, 
                               trend='additive', 
                               seasonal_periods=len(data)//6,
                               initialization_method=None)
  model = model.fit()

  prediction = model.predict(
      start=data.index[0], end=data.index[-1]
  )

  result = []
  for idx, (actual, predicted) in enumerate(zip(data.values, prediction)):
    if predicted - SF < actual < predicted + SF:
      pass
    else:
      result.append(min_index + idx)

  return result

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, FunctionTransformer
from sklearn.compose import ColumnTransformer

def split_xy(data, is_train=False, cname=None, weight=None):
  selected_columns = ['X_07', 'X_08', 'X_09', 'X_49', 'T_COS', 'T_SIN', 
                      'X_03', 'X_05', 'X_13', 'X_19', 'X_20', 'X_21', 
                      'X_22', 'X_29', 'X_36', 'X_43']
  if is_train:
    data = data.reset_index(drop=True).copy()
    # for c, w in zip(cname, weight):
    data = data.drop(
        index=outlier_index(data=data[cname], sigma=weight)
        ).copy()

  X = data.filter(items=selected_columns).copy()
  # y = data.filter(regex='Y').copy()
  y = data.filter(items=['Y_02', 'Y_03']).copy()

  lst_cols = [ c for c in ['X_07', 'X_08', 'X_09', 'X_49'] if c in selected_columns ]
  std_cols = sorted(list(set(X.columns) - set(lst_cols)))

  log_transformer = Pipeline(steps=[
      ("log1p", FunctionTransformer(np.log1p))
  ])

  splitrans = ColumnTransformer(transformers=[
      ("lst", log_transformer, lst_cols),
      ("none", 'passthrough', std_cols)
  ])

  preprocessing = Pipeline(steps=[    
      ("split", splitrans),
      ('minmax', MinMaxScaler())
  ])

  X = preprocessing.fit_transform(X)
  X = pd.DataFrame(data=X, columns=lst_cols+std_cols).copy()
  X = X.reset_index(drop=True).copy()
  return X, y

X_train, y_train = split_xy(train)
X_vaild, y_vaild = split_xy(vaild)

In [None]:
import torch
from torch.utils.data import Dataset

class AssemblyDataset(Dataset): 
    def __init__(self, x, y):
        self.x_data = x
        self.y_data = y

    def __len__(self): 
        return len(self.x_data)

    def __getitem__(self, idx): 
        x = torch.FloatTensor(self.x_data[idx,:])
        y = torch.FloatTensor(self.y_data[idx,:])
        return x, y

## 3.2.데이터로더(DataLoader)
Resnet에 Input으로 넣기 위해서는 X의 shape를 바꾸어야 함.

In [None]:
def modify_shape(x_data):
  n = np.pad(x_data.values, (0,32), 'constant', constant_values=0)[:len(x_data)]
  n = n.reshape(len(n), -1, 4, 4)
  return n.copy()

In [None]:
from torch.utils.data import DataLoader
train_dataset = AssemblyDataset(modify_shape(X_train), y_train.values)
vaild_dataset = AssemblyDataset(modify_shape(X_vaild), y_vaild.values)

# 4.모델
* Resnet50 전이학습을 사용할 예정이므로 torchvision에서 Resnet50과 Weights를 가져와서 적용한다.
* device는 GPU를 사용한다.

In [None]:
from torchvision.models import resnet34
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def build_network(output_size):
    network = resnet34()
    num_ftrs = network.fc.in_features
    network.fc = nn.Linear(num_ftrs, output_size)
    return network.to(device)

# 5.옵티마이저와 스케쥴러
* 옵티마이저는 Adam을 사용한다.
* 기존 Resnet50의 Weight를 얼마나 재 사용할지 정한다.

In [None]:
import torch.optim as optim

def build_optimizer(network, learning_rate):
    optimizer = optim.AdamW(network.parameters(), lr=learning_rate)
    return optimizer

* 스케쥴러는 StepLR을 step_size와 gamma를 변경해가며 테스트할 수 있도록 작성했다.


In [None]:
from torch.optim import lr_scheduler

def build_scheduler(optimizer, step_size, gamma):
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
    return exp_lr_scheduler

# 6.학습/검증 단계(Epoch)

In [None]:
class NRMSE(nn.Module):
  def __init__(self):
    super(NRMSE, self).__init__()
    self.mse = nn.MSELoss().to(device)

  def forward(self, gt, preds):
    # all_nrmse = torch.zeros(14)
    # for idx in range(14):
    #   rmse = torch.sqrt(self.mse(preds[:,idx], gt[:,idx]))
    #   nrmse = rmse / torch.mean(torch.abs(gt[:,idx]))
    #   all_nrmse[idx] = nrmse
    # score = 1.2 * torch.sum(all_nrmse[:8]) + 1.0 * torch.sum(all_nrmse[8:14])
    all_nrmse = torch.zeros(2)
    for idx in range(2):
      rmse = torch.sqrt(self.mse(preds[:,idx], gt[:,idx]))
      nrmse = rmse / torch.mean(torch.abs(gt[:,idx]))
      all_nrmse[idx] = nrmse
    score = 1.2 * torch.sum(all_nrmse)
    return score

In [None]:
critetion = NRMSE().to(device)

## 6.1.학습(Train) 단계

In [None]:
def train_epoch(network, loader, optimizer, scheduler):
    network.train()
    running_loss = 0.0

    for idx, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()

        with torch.set_grad_enabled(True):
          # ➡ Forward pass
          outputs = network(data)
          loss = critetion(target, outputs)

          # ⬅ Backward pass + weight update
          loss.backward()
          optimizer.step()

        running_loss += loss

    scheduler.step()
    
    epoch_loss = running_loss / len(loader)

    return epoch_loss

## 6.2.검증(Validation) 단계

In [None]:
def validation_epoch(network, loader, optimizer):
    network.eval()
    
    running_loss = 0.0

    for _, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()

        # ➡ Forward pass
        outputs = network(data)
        loss = critetion(target, outputs)

        running_loss += loss

    epoch_loss = running_loss  / len(loader)

    return epoch_loss

# 7.수행

In [None]:
import time

def run_train(config=None):
    # Initialize a new wandb run
    with wandb.init(config=config):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config

        epochs = 30
        network = build_network(2)
        # network = build_network(14)
        optimizer = build_optimizer(network, config.learning_rate)
        scheduler = build_scheduler(optimizer, config.step_size, 0.1)

        train, vaild = select_fold(data_raw_list, config.fold)
        X_train, y_train = split_xy(train)
        X_vaild, y_vaild = split_xy(vaild)
        train_dataset = AssemblyDataset(modify_shape(X_train), y_train.values)
        vaild_dataset = AssemblyDataset(modify_shape(X_vaild), y_vaild.values)
        tr_dataloader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
        va_dataloader = DataLoader(train_dataset, batch_size=len(vaild_dataset), shuffle=False)
        for epoch in range(epochs):
            since = time.time()
            
            train_score = train_epoch(network, tr_dataloader, optimizer, scheduler)
            print("Epoch: %4d, Train score: %.4f" % (epoch+1, train_score), end='')
            vaild_score = validation_epoch(network, va_dataloader, optimizer)
            print(", Validation score: %.4f" % (vaild_score), end='')
            time_elapsed = time.time() - since
            print(", Time Elapsed(s): %.2f" % (time_elapsed))
            wandb.log({'Epoch': epoch+1, 'T-Score': train_score, 'V-Score': vaild_score})

In [None]:
wandb.agent(sweep_id, run_train)

[34m[1mwandb[0m: Agent Starting Run: 7w8witwf with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	fold: 0
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	step_size: 7


Epoch:    1, Train score: 1.1950, Validation score: 0.9992, Time Elapsed(s): 8.35
Epoch:    2, Train score: 0.9020, Validation score: 0.8955, Time Elapsed(s): 8.28
Epoch:    3, Train score: 0.8786, Validation score: 0.8927, Time Elapsed(s): 8.37
Epoch:    4, Train score: 0.8646, Validation score: 0.8668, Time Elapsed(s): 8.36
Epoch:    5, Train score: 0.8590, Validation score: 0.8611, Time Elapsed(s): 8.26
Epoch:    6, Train score: 0.8680, Validation score: 0.8725, Time Elapsed(s): 8.42
Epoch:    7, Train score: 0.8537, Validation score: 0.8615, Time Elapsed(s): 8.23
Epoch:    8, Train score: 0.8345, Validation score: 0.8454, Time Elapsed(s): 8.42
Epoch:    9, Train score: 0.8267, Validation score: 0.8388, Time Elapsed(s): 8.36
Epoch:   10, Train score: 0.8233, Validation score: 0.8331, Time Elapsed(s): 8.24
Epoch:   11, Train score: 0.8196, Validation score: 0.8289, Time Elapsed(s): 8.39
Epoch:   12, Train score: 0.8152, Validation score: 0.8245, Time Elapsed(s): 8.26
Epoch:   13, Tra

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
T-Score,█▃▃▂▂▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
V-Score,█▅▅▄▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Epoch,30.0
T-Score,0.77632
V-Score,0.78423


[34m[1mwandb[0m: Agent Starting Run: btct7mno with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	fold: 1
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	step_size: 7


Epoch:    1, Train score: 1.2125, Validation score: 1.1324, Time Elapsed(s): 8.22
Epoch:    2, Train score: 0.9176, Validation score: 0.9190, Time Elapsed(s): 8.31
Epoch:    3, Train score: 0.8952, Validation score: 0.8836, Time Elapsed(s): 8.30
Epoch:    4, Train score: 0.8847, Validation score: 0.8968, Time Elapsed(s): 8.19
Epoch:    5, Train score: 0.8681, Validation score: 0.8677, Time Elapsed(s): 8.31
Epoch:    6, Train score: 0.8647, Validation score: 0.8636, Time Elapsed(s): 8.20
Epoch:    7, Train score: 0.8611, Validation score: 0.8577, Time Elapsed(s): 8.33
Epoch:    8, Train score: 0.8406, Validation score: 0.8450, Time Elapsed(s): 8.24
Epoch:    9, Train score: 0.8367, Validation score: 0.8410, Time Elapsed(s): 8.34
Epoch:   10, Train score: 0.8337, Validation score: 0.8367, Time Elapsed(s): 8.25
Epoch:   11, Train score: 0.8304, Validation score: 0.8332, Time Elapsed(s): 8.35
Epoch:   12, Train score: 0.8270, Validation score: 0.8295, Time Elapsed(s): 8.34
Epoch:   13, Tra

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
T-Score,█▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
V-Score,█▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Epoch,30.0
T-Score,0.79944
V-Score,0.80391


[34m[1mwandb[0m: Agent Starting Run: l57f8dgo with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	fold: 2
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	step_size: 7


Epoch:    1, Train score: 1.1869, Validation score: 0.9654, Time Elapsed(s): 8.30
Epoch:    2, Train score: 0.8967, Validation score: 0.9205, Time Elapsed(s): 8.14
Epoch:    3, Train score: 0.8863, Validation score: 0.8635, Time Elapsed(s): 8.26
Epoch:    4, Train score: 0.8683, Validation score: 0.8674, Time Elapsed(s): 8.15
Epoch:    5, Train score: 0.8739, Validation score: 0.8487, Time Elapsed(s): 8.23
Epoch:    6, Train score: 0.8572, Validation score: 0.8474, Time Elapsed(s): 8.15
Epoch:    7, Train score: 0.8575, Validation score: 0.8445, Time Elapsed(s): 8.28
Epoch:    8, Train score: 0.8448, Validation score: 0.8343, Time Elapsed(s): 8.27
Epoch:    9, Train score: 0.8353, Validation score: 0.8299, Time Elapsed(s): 8.15
Epoch:   10, Train score: 0.8326, Validation score: 0.8278, Time Elapsed(s): 8.26
Epoch:   11, Train score: 0.8293, Validation score: 0.8245, Time Elapsed(s): 8.24
Epoch:   12, Train score: 0.8293, Validation score: 0.8207, Time Elapsed(s): 8.26
Epoch:   13, Tra

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
T-Score,█▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
V-Score,█▆▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Epoch,30.0
T-Score,0.80744
V-Score,0.80109


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: e4e66zt8 with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	fold: 3
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	step_size: 7


Epoch:    1, Train score: 1.1790, Validation score: 0.9158, Time Elapsed(s): 8.27
Epoch:    2, Train score: 0.8847, Validation score: 0.8819, Time Elapsed(s): 8.35
Epoch:    3, Train score: 0.8724, Validation score: 0.8773, Time Elapsed(s): 8.27
Epoch:    4, Train score: 0.8618, Validation score: 0.8757, Time Elapsed(s): 8.40
Epoch:    5, Train score: 0.8593, Validation score: 0.9018, Time Elapsed(s): 8.47
Epoch:    6, Train score: 0.8570, Validation score: 0.8608, Time Elapsed(s): 8.27
Epoch:    7, Train score: 0.8555, Validation score: 0.8634, Time Elapsed(s): 8.35
Epoch:    8, Train score: 0.8352, Validation score: 0.8484, Time Elapsed(s): 8.27
Epoch:    9, Train score: 0.8299, Validation score: 0.8418, Time Elapsed(s): 8.40
Epoch:   10, Train score: 0.8259, Validation score: 0.8376, Time Elapsed(s): 8.39
Epoch:   11, Train score: 0.8218, Validation score: 0.8325, Time Elapsed(s): 8.31
Epoch:   12, Train score: 0.8179, Validation score: 0.8240, Time Elapsed(s): 8.38
Epoch:   13, Tra

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
T-Score,█▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
V-Score,█▆▆▆▇▅▅▄▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁

0,1
Epoch,30.0
T-Score,0.77566
V-Score,0.78167


[34m[1mwandb[0m: Agent Starting Run: bvo76nhr with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	fold: 4
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	step_size: 7


Epoch:    1, Train score: 1.2057, Validation score: 0.9247, Time Elapsed(s): 8.43
Epoch:    2, Train score: 0.9075, Validation score: 0.8905, Time Elapsed(s): 8.29
Epoch:    3, Train score: 0.8741, Validation score: 0.8637, Time Elapsed(s): 8.39
Epoch:    4, Train score: 0.8643, Validation score: 0.8487, Time Elapsed(s): 8.38
Epoch:    5, Train score: 0.8617, Validation score: 0.8465, Time Elapsed(s): 8.28
Epoch:    6, Train score: 0.8662, Validation score: 0.8713, Time Elapsed(s): 8.38
Epoch:    7, Train score: 0.8600, Validation score: 0.8461, Time Elapsed(s): 8.28
Epoch:    8, Train score: 0.8381, Validation score: 0.8270, Time Elapsed(s): 8.38
Epoch:    9, Train score: 0.8281, Validation score: 0.8188, Time Elapsed(s): 8.29
Epoch:   10, Train score: 0.8226, Validation score: 0.8118, Time Elapsed(s): 8.38
Epoch:   11, Train score: 0.8193, Validation score: 0.8073, Time Elapsed(s): 8.38
Epoch:   12, Train score: 0.8146, Validation score: 0.7978, Time Elapsed(s): 8.29
Epoch:   13, Tra

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
T-Score,█▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
V-Score,█▇▆▅▅▆▅▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Epoch,30.0
T-Score,0.76434
V-Score,0.74978


[34m[1mwandb[0m: Agent Starting Run: dr8j35q5 with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	fold: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	step_size: 7


Epoch:    1, Train score: 1.2155, Validation score: 0.9646, Time Elapsed(s): 8.44
Epoch:    2, Train score: 0.9069, Validation score: 0.8914, Time Elapsed(s): 8.51
Epoch:    3, Train score: 0.8813, Validation score: 0.8728, Time Elapsed(s): 8.53
Epoch:    4, Train score: 0.8745, Validation score: 0.8694, Time Elapsed(s): 8.44
Epoch:    5, Train score: 0.8652, Validation score: 0.8854, Time Elapsed(s): 8.51
Epoch:    6, Train score: 0.8610, Validation score: 0.8431, Time Elapsed(s): 8.41
Epoch:    7, Train score: 0.8532, Validation score: 0.9033, Time Elapsed(s): 8.54
Epoch:    8, Train score: 0.8410, Validation score: 0.8284, Time Elapsed(s): 8.43
Epoch:    9, Train score: 0.8295, Validation score: 0.8216, Time Elapsed(s): 8.54
Epoch:   10, Train score: 0.8290, Validation score: 0.8167, Time Elapsed(s): 8.41
Epoch:   11, Train score: 0.8226, Validation score: 0.8108, Time Elapsed(s): 8.52
Epoch:   12, Train score: 0.8185, Validation score: 0.8045, Time Elapsed(s): 8.54
Epoch:   13, Tra

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
T-Score,█▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
V-Score,█▅▅▅▅▄▆▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Epoch,30.0
T-Score,0.77622
V-Score,0.76395


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


In [None]:
raise AssertionError

AssertionError: ignored

In [None]:
epochs = 30
network = build_network(14)
optimizer = build_optimizer(network, 0.001)
scheduler = build_scheduler(optimizer, 7, 0.1)

train = pd.concat(data_raw_list)
X_train, y_train = split_xy(train, is_train=True, cname='Y_03', weight=2.2)
train_dataset = AssemblyDataset(modify_shape(X_train), y_train.values)
tr_dataloader = DataLoader(train_dataset, batch_size=512, shuffle=True)
for epoch in range(epochs):
    since = time.time()
    
    train_score = train_epoch(network, tr_dataloader, optimizer, scheduler)
    print("Epoch: %4d, Train score: %.4f" % (epoch+1, train_score), end='')
    time_elapsed = time.time() - since
    print(", Time Elapsed(s): %.2f" % (time_elapsed))
    # wandb.log({'Epoch': epoch+1, 'T-Score': train_score, 'V-Score': vaild_score})

In [None]:
test_raw_list = preprocessing_raw('test')
test = pd.concat(test_raw_list)
len(test)

In [None]:
X_test, _ = split_xy(test)
print(len(X_test))
X_test.head()

In [None]:
X_test.tail()

In [None]:
X_test.info()

In [None]:
nx = modify_shape(X_test)
nx = torch.FloatTensor(nx)
nx.shape

In [None]:
nx = nx.to(device)
summit = network(nx)

In [None]:
preds = summit.cpu().detach().numpy()

submit = raw_data['sample_submission'].copy()
for idx, col in enumerate(submit.columns):
  if col=='ID':
      continue
  submit[col] = preds[:,idx-1]
print('Done.')

submit.to_csv('/content/submit.csv', index=False)
print('Done.')

In [None]:
!cp './submit.csv' '/content/drive/MyDrive/DeepLearning_Datas'

In [None]:
pd.read_csv('./submit.csv')

In [None]:
tmp = pd.DataFrame(preds, columns=y_train.columns)
tmp

* Sweeps 에이전트가 파라미터 값을 바꿔가면서 반복 수행함. 결과는 저장

In [None]:
tmp.info()