## 데이터 불러오기

In [None]:
import pickle
from torch import from_numpy
from torch.utils import data

# get the data
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

rel_path = "/content/drive/MyDrive/Colab Notebooks/CIFAR-100/cifar-100-python/"
train_row_data = unpickle(rel_path+"train")
test_row_data = unpickle(rel_path+"test")
meta_row_data = unpickle(rel_path+"meta")

class CustomDataset():
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels
    
    def __len__(self):
        return len(self.labels)

    def __getitem__(self, ind):
        return self.images[ind], self.labels[ind]

train_data = train_row_data[b'data'].reshape(50000, 3, 32, 32)
train_data = from_numpy(train_data).float()

test_data = test_row_data[b'data'].reshape(10000, 3, 32, 32)
test_data = from_numpy(test_data).float()

train_dataset = CustomDataset(train_data, train_row_data[b'fine_labels'])
test_dataset = CustomDataset(test_data, test_row_data[b'fine_labels'])

print("input data shape", train_data[0].shape)
print("train dataset size", len(train_dataset))
print("test data size", len(test_dataset))

#Data loader, dataset 을 pythorch 에서 사용하기 편한형태로 변환
batch_size = 32
train_loader = data.DataLoader(
    dataset = train_dataset,
    batch_size = batch_size,
    shuffle = True
)
test_loader = data.DataLoader(
    dataset = test_dataset,
    batch_size = batch_size,
    shuffle = False
)

input data shape torch.Size([3, 32, 32])
train dataset size 50000
test data size 10000


## 모델

In [None]:
from torch import nn, optim, cuda
from torchvision import models

device = "cuda" if cuda.is_available() else "cpu" # device가 cuda 면 행렬연산에서 cpu 보다 빠른연산을 수행할 수 있는 gpu(그래픽카드)를 사용한다. 그렇지 않으면 그냥 cpu 를 사용한다
print(f"Training CIFAR on '{device}'\n{'='*44}") # cpu 를 사용하는지 gpu(cuda는 nvidia)를 사용하는지 출력해줌

model = models.efficientnet_b0(pretrained = True)
model.classifier[1] = nn.Linear(in_features=1280, out_features=100, bias=True)
model.to(device)

lossfunction = nn.CrossEntropyLoss()
optimiser = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.5) # stochastic gradient descent


Training CIFAR on 'cuda'


## model summary

In [None]:
# from torchsummary import summary

# summary(model, (3, 32, 32))
# print(model)

## train & test

In [None]:
import datetime
from torch import save
import os

path = "/content/drive/MyDrive/Colab Notebooks/CIFAR-100/"

serial = f"{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
weight_path = path+f"/backup/weight_{serial}"
os.makedirs(weight_path)

def train(epoch):
    model.train() # nn.Module.train method, train 과 test 를 구분, test 할 때는 modle.eval 을 호출한다
    for batch_ind, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimiser.zero_grad() # gradients 값을 0으로 초기화
        output = model(data)
        loss = lossfunction(output, target)
        loss.backward()
        optimiser.step()
        if batch_ind % 100 == 0:
            print('Train Epoch : {} | Batch Status : {}/{} ({:.0f}%) | Loss : {:.6f}'.format(
                epoch, 
                batch_ind*len(data), len(train_loader.dataset), 100. * batch_ind / len(train_loader),
                loss.item()
                ))

def test(epoch):
    model.eval() # nn.Module.eval method, train 과 test 를 구분, train 할 때는 model.train 을 호출한다
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = data.to(device), target.to(device) # gpu 에 tensor 들을 할당함(cpu가 아닌 gpu 로 계산하기 위함), 여기서 data, target 은 tensor 이다
        output = model(data)

        # sum up batch loss
        test_loss += lossfunction(output, target).item()

        # get the index of the max
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print("="*44)
    print(f"Test set: Average loss : {test_loss:.4f}, Accuracy : {correct}/{len(test_loader.dataset)}({100. * correct / len(test_loader.dataset):.0f}%)")


print(f"save files under {path}")
epoch = 30
for epoch in range(1, epoch+1):
    train(epoch)
    save(model.state_dict(), weight_path+f"/{epoch} {batch_size}")
    test(epoch)

save files under /content/drive/MyDrive/Colab Notebooks/CIFAR-100/
Train Epoch : 1 | Batch Status : 0/50000 (0%) | Loss : 4.830959
Train Epoch : 1 | Batch Status : 3200/50000 (6%) | Loss : 4.447664
Train Epoch : 1 | Batch Status : 6400/50000 (13%) | Loss : 4.354732
Train Epoch : 1 | Batch Status : 9600/50000 (19%) | Loss : 3.973842
Train Epoch : 1 | Batch Status : 12800/50000 (26%) | Loss : 4.259941
Train Epoch : 1 | Batch Status : 16000/50000 (32%) | Loss : 3.646838
Train Epoch : 1 | Batch Status : 19200/50000 (38%) | Loss : 3.462451
Train Epoch : 1 | Batch Status : 22400/50000 (45%) | Loss : 3.655944
Train Epoch : 1 | Batch Status : 25600/50000 (51%) | Loss : 3.290753
Train Epoch : 1 | Batch Status : 28800/50000 (58%) | Loss : 3.145368
Train Epoch : 1 | Batch Status : 32000/50000 (64%) | Loss : 3.258646
Train Epoch : 1 | Batch Status : 35200/50000 (70%) | Loss : 3.043272
Train Epoch : 1 | Batch Status : 38400/50000 (77%) | Loss : 3.146784
Train Epoch : 1 | Batch Status : 41600/50000 