In [112]:
import torch
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
#import matplotlib.pyplot as plt
import numpy as np

In [113]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))

cpu


In [114]:
CLASSES = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [115]:
class Net(nn.Module):
    
    def __init__(self):
        
        super().__init__()

        # Apply local response norm to layer 3
        self.local_response = nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2)
        
        # Layer1:
        # 1. Input will be 1 channel i.e grayscale images
        # 2. Apply 96 kernels, of kernel size 11, stride of 4 and padding of 0
        # 3. Apply activation function ReLU
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4, padding=0)
        # no pooling in layer 1
        # self.pool1 = nn.MaxPool2d(kernel_size=3,stride=2)

        # Layer2:
        # 1. Apply 96 kernels, kernel size 1
        # 2. Apply activation function ReLU
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=96, kernel_size=1, padding=2)
        #self.local_response2=nn.LocalResponseNorm(size=5,alpha=0.0001,beta=0.75,k=2)

        # Layer3:
        # 1. Apply 96 kernel, kernel size 1
        # 2. Apply activation function ReLU
        # 3. Do a maxpool2d, kernel size of 3 and stride2
        # 4. Dropout of 0.5
        self.conv3 = nn.Conv2d(in_channels=96, out_channels=96, kernel_size=1, stride=1, padding=0)
        self.pool_layer3 = nn.MaxPool2d(kernel_size=3, stride=2)

        # Layer4:
        # 1. Apply 256 kernel (hint the input to this conv2d layer is still 96)
        # of size 11, stride 4 and padding 2
        # 2. Apply activation function ReLU
        self.conv4=nn.Conv2d(in_channels=96, out_channels=256, kernel_size=11, stride=4, padding=2)


        # Layer5:
        # 1. Apply 256 kernels, kernel size of 1
        # 2. Apply activation function ReLU
        self.conv5=nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=1)

        # Layer6:
        # 1. Apply 256 kernels, kernel size of 1
        # 2. Apply activation function ReLU
        # 3. Maxpool2d, kernel size of 3 and stride of 2.
        # 4. Dropout of 0.5
        self.conv6=nn.Conv2d(in_channels=256, out_channels=384, kernel_size=1, stride=1)
        self.pool_layer6=nn.MaxPool2d(kernel_size=3, stride=2)
        # self.dropout_layer6 = nn.Dropout(p=.5)
        
        # Layer7:
        # 1. Apply 384 kernels of size 3, stride of 1 and padding of 1
        # 2. Apply activation function ReLU
        self.conv7=nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1)

        # Layer8:
        # 1. Apply 384 kernels of size 1
        # 2. Apply activation function ReLU
        self.conv8=nn.Conv2d(in_channels=384, out_channels=384, kernel_size=1, stride=1)

        # Layer 9:
        # Apply 384 kernels of size 1
        # Apply activation function ReLU
        # Dropout of 0.5
        self.conv9=nn.Conv2d(in_channels=384, out_channels=10, kernel_size=1)
    
        # 1. Apply 10 kernel (hint the input to this layer is 384) of kernel size 3, stride of 1 and padding of 1
        # 2. Apply activation function ReLU
        self.conv10=nn.Conv2d(in_channels=10, out_channels=10, kernel_size=3, stride=1, padding=1)

        # Layer 11:
        # 1. Apply 10 kernels of kernel size 1
        # 2. Apply activation function ReLU
        self.conv11=nn.Conv2d(in_channels=10, out_channels=10, kernel_size=1, stride=1)
    
        # Layer 12:
        # 1. Apply 10 kernels of kernel size 1
        # 2. Apply activation function ReLU
        # 3. Finally apply nn.AdaptiveAvgPool2d((1, 1)) to the final output.
        self.conv12=nn.Conv2d(in_channels=10, out_channels=10, kernel_size=1, stride=1)
        self.adapt_avg_pool=nn.AdaptiveAvgPool2d((1, 1))
    
        # self.fc1 = nn.Linear(in_features=(256*5*5),out_features=4096)
        # self.fc2 = nn.Linear(in_features=4096,out_features=4096)
        # self.fc3 = nn.Linear(in_features=4096,out_features=CLASSES)

        
    def forward(self, x):
        
        # apply ReLU to layer 1
        x = F.relu(self.conv1(x))

        # apply ReLU to layer 2
        x = F.relu(self.conv2(x))

        # dropout of .5 applied to pooled and ReLU'd convolution 3 (layer 3)
        x = self.pool_layer3(F.relu(self.local_response(self.conv3(x))))
        x = F.dropout(x, .5)

        # apply ReLU to layer 4
        x = F.relu(self.conv4(x))

        # apply ReLU to layer 5
        x = F.relu(self.conv5(x))

        # dropout of .5 applied to pooled and ReLU'd convolution 6 (layer 6)
        x = self.pool_layer6(F.relu(self.local_response(self.conv6(x))))
        x = F.dropout(x, .5)

        # apply ReLU to layer 7
        x = F.relu(self.conv7(x))

        # apply ReLU to layer 8
        x = F.relu(self.conv8(x))

        # dropout of .5 and ReLU applied to layer 9
        x = F.relu(self.conv9(x))
        x = F.dropout(x, .5)

        # apply ReLU to layer 10
        x = F.relu(self.conv10(x))

        # apply ReLU to layer 11
        x = F.relu(self.conv11(x))

        # apply ReLU to layer 12
        x = F.relu(self.conv12(x))

        x = self.adapt_avg_pool(self.conv12(x))
        # Ignore:
        # x = self.pool1(F.relu(self.local_response1(self.conv1(x))))
      
        x = torch.flatten(x, 1) # flatten all dimensions except batch
                             
        # x = F.relu(self.fc1(x))
        # x = F.dropout(x, 0.5)
        # x = F.relu(self.fc2(x))
        # x = F.dropout(x, 0.5)
        # x = self.fc3(x)
                             
        return x

In [116]:
transform_conf = transforms.Compose([
    transforms.Resize((227,227)),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,))
])

In [117]:
BATCH_SIZE = 16
train_dataset = datasets.MNIST('/users/zlinsco/data',train=True,download=True,transform=transform_conf)
test_dataset = datasets.MNIST('/users/zlinsco/data',train=False,download=True,transform=transform_conf)

In [118]:
train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=BATCH_SIZE,shuffle=True)

In [119]:
model = Net().to(device)
optimizer = optim.Adam(params=model.parameters(),lr=0.0001)
loss_fn = nn.CrossEntropyLoss()

In [120]:
def train(model,device,train_loader,optimizer,epochs):
    
    print("inside train")
    
    model.train()
    
    for batch_ids, (img, classes) in enumerate(train_loader):
        
        classes = classes.type(torch.LongTensor)
        img,classes = img.to(device),classes.to(device)
        torch.autograd.set_detect_anomaly(True)     
        optimizer.zero_grad()
        output = model(img)
        loss = loss_fn(output,classes)                
        
        loss.backward()
        optimizer.step()
        
    if(batch_ids +1) % 2 == 0:
        print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
            epochs, batch_ids * len(img), len(train_loader.dataset),
            100. * batch_ids / len(train_loader), loss.item()))

In [121]:
def test(model, device, test_loader):
    
    model.eval()
    test_loss=0
    correct=0
    
    with torch.no_grad():
        for img, classes in test_loader:
            img, classes = img.to(device), classes.to(device)
            y_hat = model(img)
            test_loss += F.nll_loss(y_hat,classes,reduction='sum').item()
            _,y_pred = torch.max(y_hat,1)
            correct += (y_pred == classes).sum().item()
        test_loss/=len(test_dataset)
        print("\n Test set: Avarage loss: {:.0f},Accuracy:{}/{} ({:.0f}%)\n".format(
            test_loss,correct,len(test_dataset),100.*correct/len(test_dataset)))
        print('='*30)

In [None]:
if __name__=='__main__':
    seed=42
    EPOCHS=1
    
    for epoch in range(1,EPOCHS+1):
        train(model,device,train_loader,optimizer,epoch)
        test(model,device,test_loader)

inside train
