In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from IPython.display import display, clear_output
import pandas as pd
import time
import json

from itertools import product
from collections import namedtuple
from collections import OrderedDict

In [2]:
class Network(torch.nn.Module):
    def __init__(self):
        super(Network,self).__init__()
        self.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = torch.nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = torch.nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = torch.nn.Linear(in_features=120, out_features=60)
        self.out = torch.nn.Linear(in_features=60, out_features=10)
        
    def forward(self,t):
        # 1. input layer
        t = t
        # 2. hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t,kernel_size=2, stride=2)
        
        # 3. hidden conv layer 2
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t,kernel_size=2, stride=2)
        
        # 4. linear layer1
        t = t.reshape(-1,12*4*4)
        t = self.fc1(t)
        t = F.relu(t)
        
        # 5. linear layer 2
        t = self.fc2(t)
        t = F.relu(t)
        
        # 6. output layer
        t = self.out(t)
#         t = F.softmax()
        
        return t

In [3]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [4]:
loader = DataLoader(train_set, batch_size=len(train_set), num_workers=1)
data = next(iter(loader))
mean = data[0].mean()
std = data[0].std()

In [5]:
mean, std

(tensor(0.2860), tensor(0.3530))

In [6]:
train_set_normal = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
)

In [7]:
train_sets = {'not_normal': train_set, 'normal': train_set_normal}

In [8]:
class RunBuilder():
    @staticmethod
    def get_runs(params):
        Run = namedtuple('Run',params.keys())
        
        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))
            
        return runs

In [9]:
class RunManager():
    def __init__(self):
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = 0
        
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None

        self.network = None
        self.loader = None
        self.tb = None
    
    def begin_run(self, run, network, loader):
        
        self.run_start_time = time.time()

        self.run_params = run
        self.run_count += 1

        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment = f'-{run}')

        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)

        self.tb.add_image('images', grid)
        self.tb.add_graph(self.network, images.to(getattr(run, 'device', 'cpu')))

    def end_run(self):

        self.tb.close()
        self.epoch_count = 0

    def begin_epoch(self):
        self.epoch_start_time  = time.time()

        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0

    def end_epoch(self):
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time

        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)

        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)

        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)

        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results['loss'] = loss
        results["accuracy"] = accuracy
        results['epoch duration'] = epoch_duration
        results['run duration'] = run_duration
        for k,v in self.run_params._asdict().items(): results[k] = v
        self.run_data.append(results)

        df = pd.DataFrame.from_dict(self.run_data, orient='columns')

        clear_output(wait=True)
        display(df)

    def track_loss(self, loss):
        self.epoch_loss += loss.item() * self.loader.batch_size

    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)

    @torch.no_grad()
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()

    def save(self, fileName):
        pd.DataFrame.from_dict(self.run_data, orient='columns').to_csv(f'{fileName}.csv')
        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [10]:
params = OrderedDict(
    lr = [.01],
    batch_size = [100],
    num_workers = [1],
    device = ['cuda'],
    trainset = ['not_normal', 'normal']
)

In [12]:
m = RunManager()
for run in RunBuilder.get_runs(params):
    
    device = torch.device(run.device)
    network = Network().to(device)
    loader = DataLoader(train_sets[run.trainset], run.batch_size, num_workers = run.num_workers)
    optimizer = optim.Adam(network.parameters(), lr = run.lr)

    m.begin_run(run, network, loader)
    for epoch in range(20):
        m.begin_epoch()
        for batch in loader:
            images = batch[0].to(device)
            labels = batch[1].to(device)
            preds = network(images)
            loss = F.cross_entropy(preds, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            m.track_loss(loss)
            m.track_num_correct(preds, labels)
        
        m.end_epoch()
    m.end_run()
m.save('results')

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers,device,trainset
0,1,1,0.5298,0.8007,6.375949,11.701911,0.01,100,1,cuda,not_normal
1,1,2,0.371607,0.863383,6.328078,18.17161,0.01,100,1,cuda,not_normal
2,1,3,0.341665,0.874633,6.438783,24.722093,0.01,100,1,cuda,not_normal
3,1,4,0.329704,0.876933,6.513582,31.355355,0.01,100,1,cuda,not_normal
4,1,5,0.321578,0.880117,6.552477,38.025517,0.01,100,1,cuda,not_normal
5,1,6,0.313976,0.88415,6.494633,44.642822,0.01,100,1,cuda,not_normal
6,1,7,0.314009,0.885483,6.512584,51.270099,0.01,100,1,cuda,not_normal
7,1,8,0.3038,0.8892,6.353011,57.738801,0.01,100,1,cuda,not_normal
8,1,9,0.292876,0.892667,6.531533,64.387022,0.01,100,1,cuda,not_normal
9,1,10,0.293074,0.8929,6.607332,71.10805,0.01,100,1,cuda,not_normal


In [13]:
pd.DataFrame.from_dict(m.run_data).sort_values('accuracy', ascending=False)

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers,device,trainset
19,1,20,0.270244,0.902533,6.45075,137.923374,0.01,100,1,cuda,not_normal
18,1,19,0.269443,0.901733,6.792835,131.35095,0.01,100,1,cuda,not_normal
17,1,18,0.280428,0.8979,6.839709,124.437438,0.01,100,1,cuda,not_normal
16,1,17,0.278511,0.897783,6.988313,117.47406,0.01,100,1,cuda,not_normal
15,1,16,0.278876,0.89745,6.468701,110.364072,0.01,100,1,cuda,not_normal
12,1,13,0.281871,0.896167,6.356004,90.529115,0.01,100,1,cuda,not_normal
13,1,14,0.281429,0.8958,6.62628,97.274077,0.01,100,1,cuda,not_normal
14,1,15,0.290324,0.894367,6.387918,103.777686,0.01,100,1,cuda,not_normal
39,2,20,0.295287,0.893233,8.737634,175.424886,0.01,100,1,cuda,normal
9,1,10,0.293074,0.8929,6.607332,71.10805,0.01,100,1,cuda,not_normal
