In [None]:
import os
from zipfile import ZipFile
from google.colab import drive

# Drive Mount
drive.mount('/content/drive')

# 알집이 있는 경로에서 open.zip을 가져옵니다
dir = '/content/drive/MyDrive/DACON/Podiblock_Structure_Extraction_AI_Contest/'

# 실제로 알집이 풀어질 경로
base_dir = '/content/'
# os.chdir(base_dir)

# 알집을 풀어볼까용?
ZipFile(dir + 'open.zip').extractall(base_dir)

# 실제로 잘 풀렸는지 확인
os.listdir(base_dir)


Mounted at /content/drive


['.config',
 'test.csv',
 'train',
 'drive',
 'train.csv',
 'test',
 'sample_submission.csv',
 'sample_data']

## Import

In [None]:
import random
import pandas as pd
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings(action='ignore') 

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
!nvidia-smi

Sat Feb 11 02:41:08 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03    Driver Version: 510.47.03    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   50C    P0    27W /  70W |      3MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## wandb

### Setup

In [None]:
!pip install wandb 
# !pip install wandb -Uq

In [None]:
import wandb

# !wandb.login()
!wandb login --relogin

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 

In [None]:
#c5c19470f3b9c096d12e4ff4236c05474afa12b8

### Define the sweep

In [None]:
sweep_config = {
    'method': 'random'
    }

In [None]:
metric = {
    'name': 'loss',
    'goal': 'minimize'   
    }

sweep_config['metric'] = metric

In [None]:
parameters_dict = ({
    'learning_rate': {
        # a flat distribution between 0 and 0.1
        'distribution': 'uniform',
        'min': 0,
        'max': 0.1
      },
    })
sweep_config['parameters'] = parameters_dict

In [None]:
parameters_dict.update({
    'batch_size': {
        # integers between 32 and 256
        # with evenly-distributed logarithms 
        'distribution': 'q_log_uniform_values',
        'q': 8,
        'min': 127,
        'max': 128,},
    'optimizer': {
        'values': ['adam','sgd']
        },
    'epochs': {
        'value': 5}
 })

In [None]:
import pprint

pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'loss'},
 'parameters': {'batch_size': {'distribution': 'q_log_uniform_values',
                               'max': 128,
                               'min': 127,
                               'q': 8},
                'epochs': {'value': 5},
                'learning_rate': {'distribution': 'uniform',
                                  'max': 0.1,
                                  'min': 0},
                'optimizer': {'values': ['adam', 'sgd']}}}


### Initialize the Sweep

In [None]:
sweep_id = wandb.sweep(sweep_config, project="pytorch-sweeps-demo-2-test")

Create sweep with ID: oqyoubrt
Sweep URL: https://wandb.ai/dcv_block-/pytorch-sweeps-demo-2-test/sweeps/oqyoubrt


### Run your Training procedure

In [None]:
import random
import pandas as pd
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings(action='ignore') 

In [None]:
import pandas as pd
import cv2
df = pd.read_csv('./train.csv')
df = df.sample(frac=1)
train_len = int(len(df) * 0.8)

train = df[:train_len]
val = df[train_len:]

cv2.imread(train['img_path'].values[0]).shape

train['img_path']

1628     ./train/TRAIN_01628.jpg
2683     ./train/TRAIN_02683.jpg
30494    ./train/TRAIN_30494.jpg
697      ./train/TRAIN_00697.jpg
4810     ./train/TRAIN_04810.jpg
                  ...           
1230     ./train/TRAIN_01230.jpg
30273    ./train/TRAIN_30273.jpg
25200    ./train/TRAIN_25200.jpg
10870    ./train/TRAIN_10870.jpg
22445    ./train/TRAIN_22445.jpg
Name: img_path, Length: 26395, dtype: object

In [None]:
def get_labels(df):
    return df.iloc[:,2:].values

train_labels = get_labels(train)

In [None]:
print(train_labels)

[[1 0 1 ... 0 0 0]
 [0 1 0 ... 1 0 0]
 [0 0 1 ... 1 0 0]
 ...
 [1 0 0 ... 1 0 0]
 [1 0 0 ... 1 1 1]
 [0 0 0 ... 0 0 0]]


In [None]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        
    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        
        image = cv2.imread(img_path)
        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
        
        if self.label_list is not None:
            label = torch.FloatTensor(self.label_list[index])
            return image, label
        else:
            return image
        
    def __len__(self):
        return len(self.img_path_list)

In [None]:
class BaseModel(nn.Module):
    def __init__(self, num_classes=10):
        super(BaseModel, self).__init__()
        self.backbone = models.efficientnet_b0(pretrained=True)
        self.classifier = nn.Linear(1000, num_classes)
        
    def forward(self, x):
        x = self.backbone(x)
        x = F.sigmoid(self.classifier(x))
        return x

In [None]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
from torchvision import datasets, transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train_wow(config=None):
    # Initialize a new wandb run
    with wandb.init(config=config):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config

        loader = build_dataset(config.batch_size)
        network = build_network()
        optimizer = build_optimizer(network, config.optimizer, config.learning_rate)

        for epoch in range(config.epochs):
            avg_loss = train_epoch(network, loader, optimizer)
            wandb.log({"loss": avg_loss, "epoch": epoch})           

In [None]:
def build_dataset(batch_size):
    
    # wandb.config.batch_size = 128
    img_size = 224
    train_transform = A.Compose([
                                A.Resize(img_size, img_size),
                                A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                                ToTensorV2()
                                ])

    train_dataset = CustomDataset(train['img_path'].values, train_labels, train_transform)
    loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True, num_workers=0)
    return loader

def build_network():
    network = BaseModel()

    return network.to(device)

def build_optimizer(network, optimizer, learning_rate):
    if optimizer == "sgd":
        optimizer = optim.SGD(network.parameters(),
                              lr=learning_rate, momentum=0.9)
    elif optimizer == "adam":
        optimizer = optim.Adam(network.parameters(),
                               lr=learning_rate)
    return optimizer

def train_epoch(network, loader, optimizer):
    
    criterion = nn.BCELoss().to(device)    
    train_loss = []
    for imgs, labels in tqdm(iter(loader)):
        imgs = imgs.float().to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        output = network(imgs)
        loss = criterion(output, labels)
        
        train_loss.append(loss.item())
        _train_loss= np.mean(train_loss)
        
        loss.backward()
        optimizer.step()
        
        
        wandb.log({"batch loss": loss.item()})
    return _train_loss 

In [None]:
wandb.agent(sweep_id, train_wow, count=100)

[34m[1mwandb[0m: Agent Starting Run: h52dpvja with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.07244602619561315
[34m[1mwandb[0m: 	optimizer: adam
ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mugiugi[0m ([33mdcv_block-[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
