### 1. 앙상블 ① - 이미지 학습모델 3개

In [3]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)


In [4]:
from torchvision.models import resnet18

# ResNet18
class ResNetModel(nn.Module):
    def __init__(self):
        super(ResNetModel, self).__init__()
        # 3채널로 미리 학습 된거 안가져옴
        self.model = resnet18(pretrained=False)
        # 채널 1이 들어가기 때문에 첫번째 레이어 바꿔버림
        self.model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        # 끝단에 클래스를 10개짜리로 분류 할 수 있는 레이어로 바꿈
        self.model.fc = nn.Linear(self.model.fc.in_features, 10)

    def forward(self, x):
        return self.model(x)

resnet_model = ResNetModel().to(device)




In [5]:
from torchvision.models import densenet121

# DenseNet121 모델
class DenseNetModel(nn.Module):
    def __init__(self):
        super(DenseNetModel, self).__init__()
        self.model = densenet121(pretrained=False)
        self.model.features.conv0 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.model.classifier = nn.Linear(self.model.classifier.in_features, 10)

    def forward(self, x):
        return self.model(x)

densenet_model = DenseNetModel().to(device)


In [6]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = nn.ReLU()(self.conv1(x))
        x = nn.MaxPool2d(2, 2)(x)
        x = nn.ReLU()(self.conv2(x))
        x = nn.MaxPool2d(2, 2)(x)
        x = x.view(-1, 64 * 56 * 56)
        x = nn.ReLU()(self.fc1(x))
        x = self.fc2(x)
        return x

cnn_model = CNNModel().to(device)


In [7]:
import torch.optim as optim
from tqdm import tqdm

criterion = nn.CrossEntropyLoss()

def train_model(model, optimizer, train_loader, epochs=1):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in tqdm(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

resnet_optimizer = optim.Adam(resnet_model.parameters(), lr=0.001)
densenet_optimizer = optim.Adam(densenet_model.parameters(), lr=0.001)
cnn_optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

# 순차적으로 학습.
# ResNet
train_model(resnet_model, resnet_optimizer, train_loader)

# DenseNet
train_model(densenet_model, densenet_optimizer, train_loader)

# CNN
train_model(cnn_model, cnn_optimizer, train_loader)


100%|██████████| 938/938 [03:30<00:00,  4.46it/s]


Epoch 1, Loss: 0.10419028171716428


100%|██████████| 938/938 [10:38<00:00,  1.47it/s]


Epoch 1, Loss: 0.1191907719726851


100%|██████████| 938/938 [01:59<00:00,  7.85it/s]

Epoch 1, Loss: 0.27463672904452596





In [8]:
# 3개 모델 준비.
models = [resnet_model, densenet_model, cnn_model]

correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in tqdm(test_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        # 이미지를 모델에 번갈아 가면서 줌 그러면 모델 3개에 대한 prediction값이 나옴
        # 모델 갯수, 배치사이즈, 10개 클래스 확률 =3, 64, 10
        outputs = [model(inputs) for model in models]

        # 걔네들을 가져와서 쌓고, z축 방향으로 mean값 처리해서 평균 확률값으로 계산
        outputs = torch.mean(torch.stack(outputs), dim=0)

        # 평균 예측 행렬 배치사이즈 X 평균 10개 클래스 확률을 max해서,
        # 배치 x 예측클래스 도출
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Ensemble Accuracy: {100 * correct / total:.2f}%')

100%|██████████| 157/157 [00:52<00:00,  3.02it/s]

Ensemble Accuracy: 99.25%





### 실습1) 와인데이터를 통한 MLP 모델 3개 클래스 분류 앙상블

각 모델을 튜닝하여 성능을 높여보자.

In [9]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, RobustScaler
from torch.utils.data import DataLoader, TensorDataset

data = pd.read_csv('wine.csv')

In [10]:
X = data.drop(columns=['class']).values
y = data[['class']].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = RobustScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [11]:
import torch.nn as nn

class Model1(nn.Module):
    def __init__(self):
        super(Model1, self).__init__()
        self.fc1 = nn.Linear(12, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        x = nn.ReLU()(self.fc1(x))
        x = nn.ReLU()(self.fc2(x))
        x = self.fc3(x)
        return x

mlp_model1 = Model1()


In [12]:
class Model2(nn.Module):
    def __init__(self):
        super(Model2, self).__init__()
        self.fc1 = nn.Linear(12, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        x = nn.ReLU()(self.fc1(x))
        x = nn.ReLU()(self.fc2(x))
        x = self.fc3(x)
        return x

mlp_model2 = Model2()


In [13]:
class Model3(nn.Module):
    def __init__(self):
        super(Model3, self).__init__()
        self.fc1 = nn.Linear(12, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        x = nn.ReLU()(self.fc1(x))
        x = nn.ReLU()(self.fc2(x))
        x = self.fc3(x)
        return x

mlp_model3 = Model3()


In [14]:
import torch.optim as optim

#criterion = nn.CrossEntropyLoss()
criterion = nn.BCEWithLogitsLoss()

optimizer1 = optim.SGD(mlp_model1.parameters(), lr=0.01)
optimizer2 = optim.SGD(mlp_model2.parameters(), lr=0.01)
optimizer3 = optim.SGD(mlp_model3.parameters(), lr=0.01)


In [15]:
epochs = 5
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer1.zero_grad()
        outputs = mlp_model1(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer1.step()
        running_loss += loss.item()
    print(f"MLP Model 1, Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")


MLP Model 1, Epoch 1, Loss: 0.6210300035592986
MLP Model 1, Epoch 2, Loss: 0.5691991971760262
MLP Model 1, Epoch 3, Loss: 0.5206249130935203
MLP Model 1, Epoch 4, Loss: 0.4678667209497312
MLP Model 1, Epoch 5, Loss: 0.40986978389867923


In [16]:
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer2.zero_grad()
        outputs = mlp_model2(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer2.step()
        running_loss += loss.item()
    print(f"MLP Model 2, Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")


MLP Model 2, Epoch 1, Loss: 0.7102951429239134
MLP Model 2, Epoch 2, Loss: 0.6356095707998043
MLP Model 2, Epoch 3, Loss: 0.5813479278145767
MLP Model 2, Epoch 4, Loss: 0.5367427915334702
MLP Model 2, Epoch 5, Loss: 0.4933891354537592


In [17]:
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer3.zero_grad()
        outputs = mlp_model3(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer3.step()
        running_loss += loss.item()
    print(f"MLP Model 3, Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")


MLP Model 3, Epoch 1, Loss: 0.7432344017959223
MLP Model 3, Epoch 2, Loss: 0.6408882039349254
MLP Model 3, Epoch 3, Loss: 0.5676129747454713
MLP Model 3, Epoch 4, Loss: 0.49649881517014854
MLP Model 3, Epoch 5, Loss: 0.41635343105327793


In [18]:
import torch.nn.functional as F

models = [mlp_model1, mlp_model2, mlp_model3]

correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = [model(inputs) for model in models]
        outputs = torch.mean(torch.stack(outputs), dim=0)
        outputs = torch.sigmoid(outputs)
        predicted = (outputs >= 0.5).long().squeeze()
        labels = labels.view_as(predicted)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Ensemble Accuracy: {accuracy:.2f}%')


Ensemble Accuracy: 78.92%


### 실습2) 와인데이터를 통한 MLP 모델 3개 퀄리티 분류 앙상블
10진 분류

각 모델을 튜닝하여 성능을 높여보자.

In [19]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, RobustScaler
from torch.utils.data import DataLoader, TensorDataset

data = pd.read_csv('wine.csv')

data['quality'] = LabelEncoder().fit_transform(data['quality'])

In [20]:
X = data.drop(columns=['quality']).values
y = data['quality'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = RobustScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [21]:
import torch.nn as nn

class Model1(nn.Module):
    def __init__(self):
        super(Model1, self).__init__()
        self.fc1 = nn.Linear(12, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 7)

    def forward(self, x):
        x = nn.ReLU()(self.fc1(x))
        x = nn.ReLU()(self.fc2(x))
        x = self.fc3(x)
        return x

mlp_model1 = Model1()


In [22]:
class Model2(nn.Module):
    def __init__(self):
        super(Model2, self).__init__()
        self.fc1 = nn.Linear(12, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 7)

    def forward(self, x):
        x = nn.ReLU()(self.fc1(x))
        x = nn.ReLU()(self.fc2(x))
        x = self.fc3(x)
        return x

mlp_model2 = Model2()


In [23]:
class Model3(nn.Module):
    def __init__(self):
        super(Model3, self).__init__()
        self.fc1 = nn.Linear(12, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 7)

    def forward(self, x):
        x = nn.ReLU()(self.fc1(x))
        x = nn.ReLU()(self.fc2(x))
        x = self.fc3(x)
        return x

mlp_model3 = Model3()


In [24]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()

optimizer1 = optim.SGD(mlp_model1.parameters(), lr=0.01)
optimizer2 = optim.SGD(mlp_model2.parameters(), lr=0.01)
optimizer3 = optim.SGD(mlp_model3.parameters(), lr=0.01)


In [25]:
epochs = 5
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer1.zero_grad()
        outputs = mlp_model1(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer1.step()
        running_loss += loss.item()
    print(f"MLP Model 1, Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")


MLP Model 1, Epoch 1, Loss: 1.9167938552251675
MLP Model 1, Epoch 2, Loss: 1.748979613548372
MLP Model 1, Epoch 3, Loss: 1.6129030294534636
MLP Model 1, Epoch 4, Loss: 1.5007749708687388
MLP Model 1, Epoch 5, Loss: 1.417306241465778


In [26]:
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer2.zero_grad()
        outputs = mlp_model2(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer2.step()
        running_loss += loss.item()
    print(f"MLP Model 2, Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")


MLP Model 2, Epoch 1, Loss: 1.9012535795932863
MLP Model 2, Epoch 2, Loss: 1.697943347256358
MLP Model 2, Epoch 3, Loss: 1.5384097724426082
MLP Model 2, Epoch 4, Loss: 1.436174023442152
MLP Model 2, Epoch 5, Loss: 1.3757931037646969


In [27]:
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer3.zero_grad()
        outputs = mlp_model3(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer3.step()
        running_loss += loss.item()
    print(f"MLP Model 3, Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")


MLP Model 3, Epoch 1, Loss: 1.7896134896976192
MLP Model 3, Epoch 2, Loss: 1.606999965702615
MLP Model 3, Epoch 3, Loss: 1.4687375208226645
MLP Model 3, Epoch 4, Loss: 1.3843883217834845
MLP Model 3, Epoch 5, Loss: 1.3337490878454068


In [28]:
models = [mlp_model1, mlp_model2, mlp_model3]

correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = [model(inputs) for model in models]
        outputs = torch.mean(torch.stack(outputs), dim=0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print(f'Ensemble Accuracy: {accuracy:.2f}%')

Ensemble Accuracy: 47.46%


### 2. 멀티모달 학습

1. MNIST 이미지와 벡터화된 MNIST 이미지를 CNN과 MLP로 특징 추출
2. 그 후, 두가지 피처벡터를 합친다음에 FC로 최종 아웃풋 내는 모델에 전달

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

batch_size = 64
learning_rate = 0.001
num_epochs = 10

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# CNN
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 7 * 7, 128)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 32 * 7 * 7)
        x = F.relu(self.fc1(x))
        return x

# MLP
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return x

# 멀티모달모델
class MultimodalModel(nn.Module):
    def __init__(self, cnn_model, mlp_model, combined_hidden_size, num_classes):
        super(MultimodalModel, self).__init__()
        self.cnn = cnn_model  # 위에 정의한 CNN모델 인스턴스 들어올것임. 아웃풋 128
        self.mlp = mlp_model  # 위에 정의한 MLP모델 인스턴스 들어올것임. 아웃풋 128

                              # cnn_model과 mlp_model이 내뱉은 두개의 아웃풋 피처 벡터를
                              # 병합해서 넣어줘야 하니까 128 + 128 인풋사이즈를 가져야한다.
                              # cobined_hidden_size는 그냥 사용자가 설정하는 멀티모달 fc히든사이즈
        self.fc_combined = nn.Linear(128 + 128, combined_hidden_size)
                              # 아웃풋 레이어 정의, 클래스 갯수만큼 출력을 정의
        self.fc_out = nn.Linear(combined_hidden_size, num_classes)

    def forward(self, image, flat_image):
        image_features = self.cnn(image) # CNN으로 이미지 피처 추출
        flat_image_features = self.mlp(flat_image) # mlp로 정형 피처 추출

                          # 그 두 피처를 가로 방향으로 concatenate함
        combined = torch.cat((image_features, flat_image_features), dim=1)

                         # 합친 피처를 은닉층에 넣고
        x = F.relu(self.fc_combined(combined))
                         # 마지막 레이어를 거쳐서 최종 아웃풋을 냄
        x = self.fc_out(x)
        return x

cnn_model = CNN()
mlp_model = MLP(input_size=784, hidden_size=128)  # 28x28 이미지를 펼친 후 784차원 입력
multimodal_model = MultimodalModel(cnn_model, mlp_model, combined_hidden_size=128, num_classes=10)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(multimodal_model.parameters(), lr=learning_rate)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
multimodal_model = multimodal_model.to(device)

for epoch in range(num_epochs):
    multimodal_model.train()
    running_loss = 0.0

    for images, labels in tqdm(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # 이미지를 펼쳐서 정형 데이터로 변환
        flat_images = images.view(images.size(0), -1).to(device)

        optimizer.zero_grad()

        outputs = multimodal_model(images, flat_images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')


    multimodal_model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in tqdm(test_loader):
            images = images.to(device)
            labels = labels.to(device)

            # 이미지를 펼쳐서 정형 데이터로 변환
            flat_images = images.view(images.size(0), -1).to(device)

            outputs = multimodal_model(images, flat_images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Validation Accuracy: {accuracy:.2f}%')


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:01<00:00, 6613438.41it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 491789.79it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 4467863.88it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4347450.65it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



100%|██████████| 938/938 [00:24<00:00, 38.74it/s]


Epoch [1/10], Loss: 0.1991


100%|██████████| 157/157 [00:02<00:00, 63.88it/s]


Validation Accuracy: 97.95%


100%|██████████| 938/938 [00:18<00:00, 49.43it/s]


Epoch [2/10], Loss: 0.0518


100%|██████████| 157/157 [00:03<00:00, 39.95it/s]


Validation Accuracy: 98.43%


100%|██████████| 938/938 [00:22<00:00, 41.29it/s]


Epoch [3/10], Loss: 0.0369


100%|██████████| 157/157 [00:02<00:00, 55.74it/s]


Validation Accuracy: 98.94%


100%|██████████| 938/938 [00:17<00:00, 53.21it/s]


Epoch [4/10], Loss: 0.0287


100%|██████████| 157/157 [00:02<00:00, 54.42it/s]


Validation Accuracy: 98.75%


100%|██████████| 938/938 [00:16<00:00, 56.04it/s]


Epoch [5/10], Loss: 0.0216


100%|██████████| 157/157 [00:02<00:00, 68.02it/s]


Validation Accuracy: 98.81%


100%|██████████| 938/938 [00:17<00:00, 54.22it/s]


Epoch [6/10], Loss: 0.0180


100%|██████████| 157/157 [00:02<00:00, 54.10it/s]


Validation Accuracy: 98.84%


100%|██████████| 938/938 [00:16<00:00, 57.43it/s]


Epoch [7/10], Loss: 0.0143


100%|██████████| 157/157 [00:02<00:00, 59.19it/s]


Validation Accuracy: 98.78%


100%|██████████| 938/938 [00:16<00:00, 55.43it/s]


Epoch [8/10], Loss: 0.0135


100%|██████████| 157/157 [00:02<00:00, 66.97it/s]


Validation Accuracy: 98.88%


100%|██████████| 938/938 [00:16<00:00, 57.55it/s]


Epoch [9/10], Loss: 0.0104


100%|██████████| 157/157 [00:02<00:00, 68.47it/s]


Validation Accuracy: 99.03%


100%|██████████| 938/938 [00:17<00:00, 54.91it/s]


Epoch [10/10], Loss: 0.0085


100%|██████████| 157/157 [00:02<00:00, 69.58it/s]

Validation Accuracy: 98.97%



