In [1]:
import torch
import torchvision
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torch.utils.data import Dataset
from torchvision import datasets,transforms, models
from torchvision.transforms import ToTensor
import os
import pandas as pd
from torchvision.io import read_image
from PIL import Image

In [2]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

print(torch.cuda.is_available())
print(torch.cuda.device_count())  # 1 이상의 숫자가 출력되어야 합니다.
print(torch.cuda.get_device_name(0))  # GPU 이름이 출력되어야 합니다.
print(torch.version.cuda)
print(torch.__version__)

Using cuda device
True
1
NVIDIA GeForce GTX 1660 SUPER
12.1
2.1.2+cu121


In [3]:
# 데이터 폴더 경로 설정
data_folder = "cifar10" 

In [80]:
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None, target_transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.target_transform = target_transform
        
        # 데이터프레임 생성
        df = pd.DataFrame({'img_id': [f'{i:05d}' for i in range(50000)]})
        df['label_path'] = df['img_id'].apply(lambda x: os.path.join('cifar10/train', f'{x}.txt'))

        # 레이블 파일에서 레이블을 읽어와 데이터프레임에 추가
        df['label'] = df['label_path'].apply(lambda path: int(open(path, 'r').readline().strip()) if os.path.exists(path) else None)

        # 레이블이 있는 데이터만 사용
        df = df.dropna()

        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = self.df.iloc[idx]['img_id']
        img_path = os.path.join(self.root_dir, f'{img_id}.png')  # 이미지 파일 경로       
        label = self.df.iloc[idx]['label']  # 해당 이미지에 대한 레이블
        
        image = Image.open(img_path).convert('RGB')
        
        # 좌우 반전
        flipped_image = image.transpose(Image.FLIP_LEFT_RIGHT)
        
        # 새로운 파일명 생성
        new_img_id = f'{idx + 50000:05d}'
        new_img_path = os.path.join(self.root_dir, f'{new_img_id}.png')
        new_label_path = os.path.join('cifar10/train', f'{new_img_id}.txt')
        
        # 저장
        flipped_image.save(new_img_path)
        with open(new_label_path, 'w') as label_file:
            label_file.write(str(label)) # 해당 이미지의 원본 레이블 값을 새로운 레이블 파일에 저장
        
        if self.transform:
            image = self.transform(image)
            
        if self.target_transform:
            label = self.target_transform(label)
            
        return image, label

    
# 클래스 정의
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

batch_size = 64

transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(p=1.0),  # 모든 이미지에 대해 좌우 반전을 적용
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# torchvision 데이터셋의 출력(output)은 [0, 1] 범위를 갖는 PILImage 이미지
# 이를 [-1, 1]의 범위로 정규화된 Tensor로 변환 (공식 문서)


dataset = CustomDataset(root_dir='cifar10/train', transform=transform_train)

# 데이터로더 생성
dataloader = torch.utils.data.DataLoader(dataset, batch_size=64, shuffle=True, num_workers=0)

print(f"Number of training samples: {len(dataset)}")


#################################

class CustomDataset_test(Dataset):
    def __init__(self, root_dir, transform=None, target_transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.target_transform = target_transform
        
       # 데이터프레임 생성
        df = pd.DataFrame({'img_id': [f'{i:05d}' for i in range(10000)]})
        df['label_path'] = df['img_id'].apply(lambda x: os.path.join('cifar10/test', f'{x}.txt'))

        # 레이블 파일에서 레이블을 읽어와 데이터프레임에 추가
        df['label'] = df['label_path'].apply(lambda path: int(open(path, 'r').readline().strip()) if os.path.exists(path) else None)

        # 레이블이 있는 데이터만 사용
        df = df.dropna()

        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = self.df.iloc[idx]['img_id']
        img_path = os.path.join(self.root_dir, f'{img_id}.png')  # 이미지 파일 경로       
        label = self.df.iloc[idx]['label']  # 해당 이미지에 대한 레이블

        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
            
        if self.target_transform:
            label = self.target_transform(label)
            
        return image, label

 
 
test_dataset = CustomDataset_test(root_dir='cifar10/test', transform=transform)

# 데이터로더 생성
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=True, num_workers=0)

print(f"Number of test samples: {len(test_dataset)}")

Number of training samples: 50000
Number of test samples: 10000


pip install torch==2.1.2+cu121 torchvision==0.16.2+cu121 torchaudio==2.1.2+cu121 -f https://download.pytorch.org/whl/cu121/torch_stable.html

In [69]:
import torch.nn.functional as F

# BasicBlock 정의
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != self.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, self.expansion * out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * out_channels)
            )

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += self.shortcut(residual)
        out = self.relu(out)

        return out
    
 # ResNet 정의
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)

        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.dropout1 = nn.Dropout(0.5)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.dropout2 = nn.Dropout(0.5)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.dropout3 = nn.Dropout(0.5)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)

        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.layer1(out)
        out = self.dropout1(out)
        out = self.layer2(out)
        out = self.dropout2(out)
        out = self.layer3(out)
        out = self.dropout3(out)
        out = self.layer4(out)

        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        
        out = F.softmax(out, dim=1)

        return out



In [70]:
# Bottleneck 블록 정의
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channels, out_channels, stride=1):
        super(Bottleneck, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.conv3 = nn.Conv2d(out_channels, out_channels * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)

        self.shortcut = nn.Sequential()
        
        if stride != 1 or in_channels != out_channels * self.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * self.expansion)
            )

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = F.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += self.shortcut(residual)
        out = F.relu(out)

        return out

In [64]:
# 수정된 ResNet 클래스 정의
class ResNet50(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet50, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 64, layers[0], stride=1)
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride):
        strides = [stride] + [1] * (blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)


        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)



        return x

In [73]:
# ResNet50 모델 인스턴스 생성
resnet50 = ResNet50(Bottleneck, [3, 4, 6, 3], num_classes=10).to(device)

# 손실 함수 및 최적화기 설정
criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(resnet50.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.RMSprop(resnet50.parameters(), lr=0.001, alpha=0.9, weight_decay=0.01)
#optimizer = optim.Adam(resnet152.parameters(), lr=0.001)

# 훈련 함수 정의
def train(resnet50, dataloader, criterion, optimizer, device):
    resnet50.train()
    running_loss = 0.0

    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = resnet50(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    return running_loss / len(dataloader)

# 테스트 함수 정의
def test(resnet50, test_dataloader, criterion, device):
    resnet50.eval()
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in test_dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = resnet50(inputs)
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)

    accuracy = correct_predictions / total_samples * 100
    return accuracy

In [74]:
print(resnet50)

ResNet50(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): Batc

In [77]:
# 훈련 및 테스트
num_epochs = 90

for epoch in range(num_epochs):
    train_loss = train(resnet50, dataloader, criterion, optimizer, device)
    test_accuracy = test(resnet50, test_dataloader, criterion, device)
    

    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%')

# 최종 모델의 정확도 출력
final_accuracy = test(resnet50, test_dataloader, criterion, device)
print(f'Final Test Accuracy: {final_accuracy:.2f}%')

Epoch [1/90], Train Loss: 2.0231, Test Accuracy: 22.65%
Epoch [2/90], Train Loss: 1.6182, Test Accuracy: 39.76%
Epoch [3/90], Train Loss: 1.4150, Test Accuracy: 35.60%
Epoch [4/90], Train Loss: 1.2786, Test Accuracy: 49.35%
Epoch [5/90], Train Loss: 1.1956, Test Accuracy: 37.90%
Epoch [6/90], Train Loss: 1.1391, Test Accuracy: 35.17%
Epoch [7/90], Train Loss: 1.1011, Test Accuracy: 57.48%
Epoch [8/90], Train Loss: 1.0722, Test Accuracy: 56.41%
Epoch [9/90], Train Loss: 1.0526, Test Accuracy: 50.71%
Epoch [10/90], Train Loss: 1.0294, Test Accuracy: 50.26%
Epoch [11/90], Train Loss: 1.0120, Test Accuracy: 53.68%
Epoch [12/90], Train Loss: 1.0021, Test Accuracy: 55.97%
Epoch [13/90], Train Loss: 0.9892, Test Accuracy: 58.35%


KeyboardInterrupt: 

---

---

In [61]:
correct = 0
total = 0

# 학습 중이 아니므로, 출력에 대한 변화도를 계산할 필요가 없습니다
with torch.no_grad():
    for data in test_dataloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        
        # 신경망에 이미지를 통과시켜 출력을 계산합니다
        outputs = resnet50(images)
        
        # 가장 높은 값(energy)를 갖는 분류(class)를 정답으로 선택하겠습니다
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

# 각 분류(class)에 대한 예측값 계산을 위해 준비
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# 변화도는 여전히 필요하지 않습니다
with torch.no_grad():
    for data in test_dataloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        
        outputs = resnet50(images)
        _, predictions = torch.max(outputs, 1)
        # 각 분류별로 올바른 예측 수를 모읍니다
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# 각 분류별 정확도(accuracy)를 출력합니다
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

Accuracy of the network on the 10000 test images: 74 %
Accuracy for class: plane is 81.5 %
Accuracy for class: car   is 86.8 %
Accuracy for class: bird  is 63.8 %
Accuracy for class: cat   is 51.0 %
Accuracy for class: deer  is 67.5 %
Accuracy for class: dog   is 65.0 %
Accuracy for class: frog  is 84.2 %
Accuracy for class: horse is 80.1 %
Accuracy for class: ship  is 88.0 %
Accuracy for class: truck is 78.5 %


---