In [None]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.init as init 
import torchvision 
from torchvision import transforms, datasets
from sklearn.datasets import load_digits
from sklearn import datasets, model_selection

from matplotlib import pyplot as plt
from matplotlib import cm
import urllib.request
import pandas as pd

%matplotlib inline

In [None]:
DEVICE = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
EPOCHS = 7 #30


from scipy.io import loadmat
mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat"
mnist_path = "./mnist-original.mat"
response = urllib.request.urlopen(mnist_alternative_url)
with open(mnist_path, "wb") as f:
    content = response.read()
    f.write(content)
mnist_raw = loadmat(mnist_path)
mnist = {
    "data": mnist_raw["data"].T,
    "target": mnist_raw["label"][0],
    "COL_NAMES": ["label", "data"],
    "DESCR": "mldata.org dataset: mnist-original",
}
print("Success!")

In [None]:
mnist_data = mnist['data'] / 255

pd.DataFrame(mnist_data)

In [None]:
plt.imshow(mnist_data[0].reshape(28, 28), cmap=cm.gray_r)
plt.show()

In [None]:
mnist_label = mnist['target']
mnist_label

In [None]:
train_size = 60000
test_size = 10000
train_X, test_X, train_Y, test_Y = model_selection.train_test_split(mnist_data,
                                                                    mnist_label,
                                                                    train_size=train_size,
                                                                    test_size=test_size
                                                                   )

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

train_X = torch.from_numpy(train_X).float().to(device)
train_Y = torch.from_numpy(train_Y).long().to(device)


test_X = torch.from_numpy(test_X).float().to(device)
test_Y = torch.from_numpy(test_Y).long().to(device)

print(train_X.shape)
print(train_Y.shape)

In [None]:
train = TensorDataset(train_X, train_Y)
train_loader = DataLoader(train, batch_size=512, shuffle=True)

test = TensorDataset(test_X, test_Y)
test_loader = DataLoader(test, batch_size=512, shuffle=True)


In [None]:
class MLPAdam(nn.Module):
  def __init__(self):
    super(MLPAdam, self).__init__()
    self.layers = nn.Sequential(
            nn.Linear(784, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
    )
    self.fc1 = nn.Linear(784, 512)
    self.fc2 = nn.Linear(512, 256)
    self.fc3 = nn.Linear(256, 128)
    self.fc4 = nn.Linear(128, 10)
    self.dropout_prob = 0.5   
    self.batch_norm1 = nn.BatchNorm1d(512) 
    self.batch_norm2 = nn.BatchNorm1d(256)
    self.batch_norm3 = nn.BatchNorm1d(128)



  def forward(self, x):
    x = x.view(-1, 28 * 28)
    x = self.fc1(x)
    x = self.batch_norm1(x)
    x = F.relu(x) 
    x = F.dropout(x, training=self.training, p=self.dropout_prob) 

    x = self.fc2(x)
    x = self.batch_norm2(x)
    x = F.relu(x) 
    x = F.dropout(x, training=self.training, p=self.dropout_prob) 

    x = self.fc3(x)
    x = self.batch_norm3(x)
    x = F.relu(x)
    x = F.dropout(x, training=self.training, p=self.dropout_prob) 
    x = self.fc4(x)
    x = F.log_softmax(x, dim=1)
    
    return x




In [None]:
def weight_init(m):
    if isinstance(m, nn.Linear):
        init.kaiming_uniform_(m.weight.data)        # 카이밍헤 방법으로 w 초기화 하기
model = MLPAdam().to(DEVICE)
model.apply(weight_init)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

In [None]:
def train(model, train_loader, optimizer, log_interval):
    model.train()
    for batch_idx, (image, label) in enumerate(train_loader):
        image = image.to(DEVICE)
        label = label.to(DEVICE)
        optimizer.zero_grad()
        output = model(image)
        loss = loss_fn(output, label)
        loss.backward()
        optimizer.step()

In [None]:
tl = []
def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for image, label in test_loader:
            image = image.to(DEVICE)
            label = label.to(DEVICE)
            output = model(image)
            test_loss += loss_fn(output, label).item()
            prediction = output.max(1, keepdim=True)[1]
            correct += prediction.eq(label.view_as(prediction)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    tl.append(test_loss)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy

In [None]:
for Epoch in range(1, EPOCHS + 1):
    train(model, train_loader, optimizer, log_interval=100)
    test_loss, test_accuracy = evaluate(model, test_loader)
    print("[EPOCH: {}], \tTest Loss: {:.4f}, \tTest Accuracy: {:.2f} %".format(Epoch, test_loss, test_accuracy))


plt.plot(tl, label = "test loss")
plt.title('test loss')
plt.xlabel('epoch')
plt.legend()
plt.show()