# Import Modules

In [None]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from sklearn.datasets import load_digits
from sklearn import datasets, model_selection

from matplotlib import pyplot as plt
from matplotlib import cm
import urllib.request
import pandas as pd

%matplotlib inline

In [None]:
from scipy.io import loadmat
mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat"
mnist_path = "./mnist-original.mat"
response = urllib.request.urlopen(mnist_alternative_url)
with open(mnist_path, "wb") as f:
    content = response.read()
    f.write(content)
mnist_raw = loadmat(mnist_path)
mnist = {
    "data": mnist_raw["data"].T,
    "target": mnist_raw["label"][0],
    "COL_NAMES": ["label", "data"],
    "DESCR": "mldata.org dataset: mnist-original",
}
print("Success!")

In [None]:
mnist_data = mnist['data'] / 255 # mean에서만 처리를 해준건데 std도 처리를 해주면 좋지

pd.DataFrame(mnist_data)

In [None]:
''' # check dataset
plt.imshow(mnist_data[0].reshape(28, 28), cmap=cm.gray_r)
plt.show()
'''

In [None]:
mnist_label = mnist['target']
mnist_label

In [None]:
# len함수 같은거 이용 --> len (train_dataset)
train_size = 60000
# model selection --> val로
test_size = 10000
train_X, test_X, train_Y, test_Y = model_selection.train_test_split(mnist_data,
                                                                    mnist_label,
                                                                    train_size=train_size,
                                                                    test_size=test_size
                                                                   )

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

train_X = torch.from_numpy(train_X).float().to(device)
train_Y = torch.from_numpy(train_Y).long().to(device)


test_X = torch.from_numpy(test_X).float().to(device)
test_Y = torch.from_numpy(test_Y).long().to(device)

print(train_X.shape)
print(train_Y.shape)

In [None]:
train = TensorDataset(train_X, train_Y)
train_loader = DataLoader(train, batch_size=512, shuffle=True)

In [None]:
class MLPSGD(nn.Module):
  def __init__(self):
    super(MLPSGD, self).__init__()
    self.layers = nn.Sequential(
            nn.Linear(784, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
    )
    self.fc1 = nn.Linear(784, 512)
    self.fc2 = nn.Linear(512, 256)
    self.fc3 = nn.Linear(256, 128)
    self.fc4 = nn.Linear(128, 10)
    self.dropout_prob = 0.5   
    self.batch_norm1 = nn.BatchNorm1d(512) 
    self.batch_norm2 = nn.BatchNorm1d(256)
    self.batch_norm3 = nn.BatchNorm1d(128)

  def forward(self, x):
    x = x.view(-1, 28 * 28)
    x = self.fc1(x)
    x = self.batch_norm1(x)
    x = F.relu(x) 
    x = F.dropout(x, training=self.training, p=self.dropout_prob) 

    x = self.fc2(x)
    x = self.batch_norm2(x)
    x = F.relu(x) 
    x = F.dropout(x, training=self.training, p=self.dropout_prob) 

    x = self.fc3(x)
    x = self.batch_norm3(x)
    x = F.relu(x) # sigmoid(x)
    x = F.dropout(x, training=self.training, p=self.dropout_prob) 
    x = self.fc4(x)
    x = F.log_softmax(x, dim=1)
    
    return x

model = MLPSGD()
model.cuda()

In [None]:
allloss = []
criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(model.parameters(), lr=0.01)

for epoch in range(1000): # 20

  total_loss = 0

  for train_x, train_y in train_loader:

    train_x, train_y = Variable(train_x), Variable(train_y)

    optimizer.zero_grad()

    output = model(train_x)


    loss = criterion(output, train_y)

    loss.backward()

    optimizer.step()

    total_loss += loss.data.item()


  if (epoch+1) % 100 == 0:
    print(epoch+1, total_loss)
    allloss.append(total_loss)


In [None]:
plt.plot(allloss, label = "test accuracy")
plt.title('test loss')
plt.xlabel('epoch')
plt.legend()
plt.show()

In [None]:
test_x, test_y = Variable(test_X), Variable(test_Y)
result = torch.max(model(test_x).data, 1)[1]
accuracy = sum(test_y.cpu().data.numpy() == result.cpu().numpy()) / len(test_y.cpu().data.numpy())

accuracy